mirror of
https://github.com/jomjol/AI-on-the-edge-device.git
synced 2025-12-08 20:46:52 +03:00
removed tflite-lib
This commit is contained in:
@@ -1,72 +0,0 @@
|
|||||||
## TODO: GLOB is not a good way to collect files. Use explicit file list instead
|
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.5)
|
|
||||||
|
|
||||||
set(tflite_dir "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow/lite")
|
|
||||||
set(tfmicro_dir "${tflite_dir}/micro")
|
|
||||||
set(tfmicro_frontend_dir "${tflite_dir}/experimental/microfrontend/lib")
|
|
||||||
set(tfmicro_kernels_dir "${tfmicro_dir}/kernels")
|
|
||||||
|
|
||||||
file(GLOB srcs_micro
|
|
||||||
"${tfmicro_dir}/*.cc"
|
|
||||||
"${tfmicro_dir}/*.c")
|
|
||||||
|
|
||||||
file(GLOB src_micro_frontend
|
|
||||||
"${tfmicro_frontend_dir}/*.c"
|
|
||||||
"${tfmicro_frontend_dir}/*.cc")
|
|
||||||
file(GLOB srcs_kernels
|
|
||||||
"${tfmicro_kernels_dir}/*.c"
|
|
||||||
"${tfmicro_kernels_dir}/*.cc")
|
|
||||||
|
|
||||||
# remove sources which will be provided by esp_nn
|
|
||||||
list(REMOVE_ITEM srcs_kernels
|
|
||||||
"${tfmicro_kernels_dir}/add.cc"
|
|
||||||
"${tfmicro_kernels_dir}/conv.cc"
|
|
||||||
"${tfmicro_kernels_dir}/depthwise_conv.cc"
|
|
||||||
"${tfmicro_kernels_dir}/fully_connected.cc"
|
|
||||||
"${tfmicro_kernels_dir}/mul.cc"
|
|
||||||
"${tfmicro_kernels_dir}/pooling.cc"
|
|
||||||
"${tfmicro_kernels_dir}/softmax.cc")
|
|
||||||
|
|
||||||
FILE(GLOB esp_nn_kernels
|
|
||||||
"${tfmicro_kernels_dir}/esp_nn/*.cc")
|
|
||||||
|
|
||||||
set(lib_srcs
|
|
||||||
"${srcs_micro}"
|
|
||||||
"${srcs_kernels}"
|
|
||||||
"${esp_nn_kernels}"
|
|
||||||
"${src_micro_frontend}"
|
|
||||||
"${tflite_dir}/kernels/kernel_util.cc"
|
|
||||||
"${tflite_dir}/micro/memory_planner/greedy_memory_planner.cc"
|
|
||||||
"${tflite_dir}/micro/memory_planner/linear_memory_planner.cc"
|
|
||||||
"${tflite_dir}/micro/arena_allocator/non_persistent_arena_buffer_allocator.cc"
|
|
||||||
"${tflite_dir}/micro/arena_allocator/persistent_arena_buffer_allocator.cc"
|
|
||||||
"${tflite_dir}/micro/arena_allocator/recording_single_arena_buffer_allocator.cc"
|
|
||||||
"${tflite_dir}/micro/arena_allocator/single_arena_buffer_allocator.cc"
|
|
||||||
"${tflite_dir}/c/common.cc"
|
|
||||||
"${tflite_dir}/core/api/error_reporter.cc"
|
|
||||||
"${tflite_dir}/core/api/flatbuffer_conversions.cc"
|
|
||||||
"${tflite_dir}/core/api/op_resolver.cc"
|
|
||||||
"${tflite_dir}/core/api/tensor_utils.cc"
|
|
||||||
"${tflite_dir}/kernels/internal/quantization_util.cc"
|
|
||||||
"${tflite_dir}/schema/schema_utils.cc")
|
|
||||||
|
|
||||||
idf_component_register(
|
|
||||||
SRCS "${lib_srcs}"
|
|
||||||
INCLUDE_DIRS "." "third_party/gemmlowp"
|
|
||||||
"third_party/flatbuffers/include"
|
|
||||||
"third_party/ruy"
|
|
||||||
"third_party/kissfft"
|
|
||||||
REQUIRES "esp-nn")
|
|
||||||
|
|
||||||
# Reduce the level of paranoia to be able to compile TF sources
|
|
||||||
target_compile_options(${COMPONENT_LIB} PRIVATE
|
|
||||||
-Wno-maybe-uninitialized
|
|
||||||
-Wno-missing-field-initializers
|
|
||||||
-DESP_NN # enables ESP-NN optimizations by Espressif
|
|
||||||
-Wno-type-limits)
|
|
||||||
|
|
||||||
target_compile_options(${COMPONENT_LIB} PRIVATE -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -Wno-nonnull)
|
|
||||||
target_compile_options(${COMPONENT_LIB} PRIVATE $<$<COMPILE_LANGUAGE:CXX>: -std=c++11 -fno-rtti -fno-exceptions -fno-threadsafe-statics -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Werror -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -Wno-return-type -Wno-strict-aliasing -std=gnu++14 >)
|
|
||||||
target_compile_options(${COMPONENT_LIB} INTERFACE $<$<IN_LIST:-DTF_LITE_STATIC_MEMORY,$<TARGET_PROPERTY:${COMPONENT_LIB},COMPILE_OPTIONS>>:-DTF_LITE_STATIC_MEMORY>)
|
|
||||||
target_link_libraries(${COMPONENT_LIB} PRIVATE -lm)
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
// Compatibility shim for new location of interface definitions.
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
|
|
||||||
#define TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
|
|
||||||
@@ -1,193 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_LITE_BUILTIN_OPS_H_
|
|
||||||
#define TENSORFLOW_LITE_BUILTIN_OPS_H_
|
|
||||||
|
|
||||||
// DO NOT EDIT MANUALLY: This file is automatically generated by
|
|
||||||
// `schema/builtin_ops_header/generator.cc`.
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif // __cplusplus
|
|
||||||
|
|
||||||
// The enum for builtin operators.
|
|
||||||
// Note: CUSTOM, DELEGATE, and PLACEHOLDER_FOR_GREATER_OP_CODES are 3 special
|
|
||||||
// ops which are not real built-in ops.
|
|
||||||
typedef enum {
|
|
||||||
kTfLiteBuiltinAdd = 0,
|
|
||||||
kTfLiteBuiltinAveragePool2d = 1,
|
|
||||||
kTfLiteBuiltinConcatenation = 2,
|
|
||||||
kTfLiteBuiltinConv2d = 3,
|
|
||||||
kTfLiteBuiltinDepthwiseConv2d = 4,
|
|
||||||
kTfLiteBuiltinDepthToSpace = 5,
|
|
||||||
kTfLiteBuiltinDequantize = 6,
|
|
||||||
kTfLiteBuiltinEmbeddingLookup = 7,
|
|
||||||
kTfLiteBuiltinFloor = 8,
|
|
||||||
kTfLiteBuiltinFullyConnected = 9,
|
|
||||||
kTfLiteBuiltinHashtableLookup = 10,
|
|
||||||
kTfLiteBuiltinL2Normalization = 11,
|
|
||||||
kTfLiteBuiltinL2Pool2d = 12,
|
|
||||||
kTfLiteBuiltinLocalResponseNormalization = 13,
|
|
||||||
kTfLiteBuiltinLogistic = 14,
|
|
||||||
kTfLiteBuiltinLshProjection = 15,
|
|
||||||
kTfLiteBuiltinLstm = 16,
|
|
||||||
kTfLiteBuiltinMaxPool2d = 17,
|
|
||||||
kTfLiteBuiltinMul = 18,
|
|
||||||
kTfLiteBuiltinRelu = 19,
|
|
||||||
kTfLiteBuiltinReluN1To1 = 20,
|
|
||||||
kTfLiteBuiltinRelu6 = 21,
|
|
||||||
kTfLiteBuiltinReshape = 22,
|
|
||||||
kTfLiteBuiltinResizeBilinear = 23,
|
|
||||||
kTfLiteBuiltinRnn = 24,
|
|
||||||
kTfLiteBuiltinSoftmax = 25,
|
|
||||||
kTfLiteBuiltinSpaceToDepth = 26,
|
|
||||||
kTfLiteBuiltinSvdf = 27,
|
|
||||||
kTfLiteBuiltinTanh = 28,
|
|
||||||
kTfLiteBuiltinConcatEmbeddings = 29,
|
|
||||||
kTfLiteBuiltinSkipGram = 30,
|
|
||||||
kTfLiteBuiltinCall = 31,
|
|
||||||
kTfLiteBuiltinCustom = 32,
|
|
||||||
kTfLiteBuiltinEmbeddingLookupSparse = 33,
|
|
||||||
kTfLiteBuiltinPad = 34,
|
|
||||||
kTfLiteBuiltinUnidirectionalSequenceRnn = 35,
|
|
||||||
kTfLiteBuiltinGather = 36,
|
|
||||||
kTfLiteBuiltinBatchToSpaceNd = 37,
|
|
||||||
kTfLiteBuiltinSpaceToBatchNd = 38,
|
|
||||||
kTfLiteBuiltinTranspose = 39,
|
|
||||||
kTfLiteBuiltinMean = 40,
|
|
||||||
kTfLiteBuiltinSub = 41,
|
|
||||||
kTfLiteBuiltinDiv = 42,
|
|
||||||
kTfLiteBuiltinSqueeze = 43,
|
|
||||||
kTfLiteBuiltinUnidirectionalSequenceLstm = 44,
|
|
||||||
kTfLiteBuiltinStridedSlice = 45,
|
|
||||||
kTfLiteBuiltinBidirectionalSequenceRnn = 46,
|
|
||||||
kTfLiteBuiltinExp = 47,
|
|
||||||
kTfLiteBuiltinTopkV2 = 48,
|
|
||||||
kTfLiteBuiltinSplit = 49,
|
|
||||||
kTfLiteBuiltinLogSoftmax = 50,
|
|
||||||
kTfLiteBuiltinDelegate = 51,
|
|
||||||
kTfLiteBuiltinBidirectionalSequenceLstm = 52,
|
|
||||||
kTfLiteBuiltinCast = 53,
|
|
||||||
kTfLiteBuiltinPrelu = 54,
|
|
||||||
kTfLiteBuiltinMaximum = 55,
|
|
||||||
kTfLiteBuiltinArgMax = 56,
|
|
||||||
kTfLiteBuiltinMinimum = 57,
|
|
||||||
kTfLiteBuiltinLess = 58,
|
|
||||||
kTfLiteBuiltinNeg = 59,
|
|
||||||
kTfLiteBuiltinPadv2 = 60,
|
|
||||||
kTfLiteBuiltinGreater = 61,
|
|
||||||
kTfLiteBuiltinGreaterEqual = 62,
|
|
||||||
kTfLiteBuiltinLessEqual = 63,
|
|
||||||
kTfLiteBuiltinSelect = 64,
|
|
||||||
kTfLiteBuiltinSlice = 65,
|
|
||||||
kTfLiteBuiltinSin = 66,
|
|
||||||
kTfLiteBuiltinTransposeConv = 67,
|
|
||||||
kTfLiteBuiltinSparseToDense = 68,
|
|
||||||
kTfLiteBuiltinTile = 69,
|
|
||||||
kTfLiteBuiltinExpandDims = 70,
|
|
||||||
kTfLiteBuiltinEqual = 71,
|
|
||||||
kTfLiteBuiltinNotEqual = 72,
|
|
||||||
kTfLiteBuiltinLog = 73,
|
|
||||||
kTfLiteBuiltinSum = 74,
|
|
||||||
kTfLiteBuiltinSqrt = 75,
|
|
||||||
kTfLiteBuiltinRsqrt = 76,
|
|
||||||
kTfLiteBuiltinShape = 77,
|
|
||||||
kTfLiteBuiltinPow = 78,
|
|
||||||
kTfLiteBuiltinArgMin = 79,
|
|
||||||
kTfLiteBuiltinFakeQuant = 80,
|
|
||||||
kTfLiteBuiltinReduceProd = 81,
|
|
||||||
kTfLiteBuiltinReduceMax = 82,
|
|
||||||
kTfLiteBuiltinPack = 83,
|
|
||||||
kTfLiteBuiltinLogicalOr = 84,
|
|
||||||
kTfLiteBuiltinOneHot = 85,
|
|
||||||
kTfLiteBuiltinLogicalAnd = 86,
|
|
||||||
kTfLiteBuiltinLogicalNot = 87,
|
|
||||||
kTfLiteBuiltinUnpack = 88,
|
|
||||||
kTfLiteBuiltinReduceMin = 89,
|
|
||||||
kTfLiteBuiltinFloorDiv = 90,
|
|
||||||
kTfLiteBuiltinReduceAny = 91,
|
|
||||||
kTfLiteBuiltinSquare = 92,
|
|
||||||
kTfLiteBuiltinZerosLike = 93,
|
|
||||||
kTfLiteBuiltinFill = 94,
|
|
||||||
kTfLiteBuiltinFloorMod = 95,
|
|
||||||
kTfLiteBuiltinRange = 96,
|
|
||||||
kTfLiteBuiltinResizeNearestNeighbor = 97,
|
|
||||||
kTfLiteBuiltinLeakyRelu = 98,
|
|
||||||
kTfLiteBuiltinSquaredDifference = 99,
|
|
||||||
kTfLiteBuiltinMirrorPad = 100,
|
|
||||||
kTfLiteBuiltinAbs = 101,
|
|
||||||
kTfLiteBuiltinSplitV = 102,
|
|
||||||
kTfLiteBuiltinUnique = 103,
|
|
||||||
kTfLiteBuiltinCeil = 104,
|
|
||||||
kTfLiteBuiltinReverseV2 = 105,
|
|
||||||
kTfLiteBuiltinAddN = 106,
|
|
||||||
kTfLiteBuiltinGatherNd = 107,
|
|
||||||
kTfLiteBuiltinCos = 108,
|
|
||||||
kTfLiteBuiltinWhere = 109,
|
|
||||||
kTfLiteBuiltinRank = 110,
|
|
||||||
kTfLiteBuiltinElu = 111,
|
|
||||||
kTfLiteBuiltinReverseSequence = 112,
|
|
||||||
kTfLiteBuiltinMatrixDiag = 113,
|
|
||||||
kTfLiteBuiltinQuantize = 114,
|
|
||||||
kTfLiteBuiltinMatrixSetDiag = 115,
|
|
||||||
kTfLiteBuiltinRound = 116,
|
|
||||||
kTfLiteBuiltinHardSwish = 117,
|
|
||||||
kTfLiteBuiltinIf = 118,
|
|
||||||
kTfLiteBuiltinWhile = 119,
|
|
||||||
kTfLiteBuiltinNonMaxSuppressionV4 = 120,
|
|
||||||
kTfLiteBuiltinNonMaxSuppressionV5 = 121,
|
|
||||||
kTfLiteBuiltinScatterNd = 122,
|
|
||||||
kTfLiteBuiltinSelectV2 = 123,
|
|
||||||
kTfLiteBuiltinDensify = 124,
|
|
||||||
kTfLiteBuiltinSegmentSum = 125,
|
|
||||||
kTfLiteBuiltinBatchMatmul = 126,
|
|
||||||
kTfLiteBuiltinPlaceholderForGreaterOpCodes = 127,
|
|
||||||
kTfLiteBuiltinCumsum = 128,
|
|
||||||
kTfLiteBuiltinCallOnce = 129,
|
|
||||||
kTfLiteBuiltinBroadcastTo = 130,
|
|
||||||
kTfLiteBuiltinRfft2d = 131,
|
|
||||||
kTfLiteBuiltinConv3d = 132,
|
|
||||||
kTfLiteBuiltinImag = 133,
|
|
||||||
kTfLiteBuiltinReal = 134,
|
|
||||||
kTfLiteBuiltinComplexAbs = 135,
|
|
||||||
kTfLiteBuiltinHashtable = 136,
|
|
||||||
kTfLiteBuiltinHashtableFind = 137,
|
|
||||||
kTfLiteBuiltinHashtableImport = 138,
|
|
||||||
kTfLiteBuiltinHashtableSize = 139,
|
|
||||||
kTfLiteBuiltinReduceAll = 140,
|
|
||||||
kTfLiteBuiltinConv3dTranspose = 141,
|
|
||||||
kTfLiteBuiltinVarHandle = 142,
|
|
||||||
kTfLiteBuiltinReadVariable = 143,
|
|
||||||
kTfLiteBuiltinAssignVariable = 144,
|
|
||||||
kTfLiteBuiltinBroadcastArgs = 145,
|
|
||||||
kTfLiteBuiltinRandomStandardNormal = 146,
|
|
||||||
kTfLiteBuiltinBucketize = 147,
|
|
||||||
kTfLiteBuiltinRandomUniform = 148,
|
|
||||||
kTfLiteBuiltinMultinomial = 149,
|
|
||||||
kTfLiteBuiltinGelu = 150,
|
|
||||||
kTfLiteBuiltinDynamicUpdateSlice = 151,
|
|
||||||
kTfLiteBuiltinRelu0To1 = 152,
|
|
||||||
kTfLiteBuiltinUnsortedSegmentProd = 153,
|
|
||||||
kTfLiteBuiltinUnsortedSegmentMax = 154,
|
|
||||||
kTfLiteBuiltinUnsortedSegmentSum = 155,
|
|
||||||
kTfLiteBuiltinAtan2 = 156,
|
|
||||||
kTfLiteBuiltinUnsortedSegmentMin = 157,
|
|
||||||
} TfLiteBuiltinOperator;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif // __cplusplus
|
|
||||||
#endif // TENSORFLOW_LITE_BUILTIN_OPS_H_
|
|
||||||
@@ -1,525 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
|
|
||||||
#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif // __cplusplus
|
|
||||||
|
|
||||||
// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
|
|
||||||
// number of dimensions.
|
|
||||||
#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
|
|
||||||
|
|
||||||
// TODO(aselle): Consider using "if this then that" for testing.
|
|
||||||
|
|
||||||
// Useful placeholder to put in otherwise empty structs to avoid size warnings.
|
|
||||||
typedef struct {
|
|
||||||
char dummy;
|
|
||||||
} EmptyStructPlaceholder;
|
|
||||||
|
|
||||||
// IMPORTANT: All new members of structs must be added at the end to ensure
|
|
||||||
// backwards compatibility.
|
|
||||||
|
|
||||||
// Possible padding types (for convolutions)
|
|
||||||
typedef enum {
|
|
||||||
kTfLitePaddingUnknown = 0,
|
|
||||||
kTfLitePaddingSame,
|
|
||||||
kTfLitePaddingValid,
|
|
||||||
} TfLitePadding;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
kTfLiteMirrorPaddingUnknown = 0,
|
|
||||||
kTfLiteMirrorPaddingReflect,
|
|
||||||
kTfLiteMirrorPaddingSymmetric,
|
|
||||||
} TfLiteMirrorPaddingMode;
|
|
||||||
|
|
||||||
// TODO(b/130259536): We should move this out of builtin_op_data.
|
|
||||||
typedef struct {
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
int width_offset;
|
|
||||||
int height_offset;
|
|
||||||
} TfLitePaddingValues;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteMirrorPaddingMode mode;
|
|
||||||
} TfLiteMirrorPaddingParams;
|
|
||||||
|
|
||||||
// Possible fused activation functions.
|
|
||||||
typedef enum {
|
|
||||||
kTfLiteActNone = 0,
|
|
||||||
kTfLiteActRelu,
|
|
||||||
kTfLiteActReluN1To1, // min(max(-1, x), 1)
|
|
||||||
kTfLiteActRelu6, // min(max(0, x), 6)
|
|
||||||
kTfLiteActTanh,
|
|
||||||
kTfLiteActSignBit,
|
|
||||||
kTfLiteActSigmoid,
|
|
||||||
} TfLiteFusedActivation;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
// Parameters for CONV_2D version 1.
|
|
||||||
TfLitePadding padding;
|
|
||||||
int stride_width;
|
|
||||||
int stride_height;
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
|
|
||||||
// Parameters for CONV_2D version 2.
|
|
||||||
// Note: Version 2 supports dilation values not equal to 1.
|
|
||||||
int dilation_width_factor;
|
|
||||||
int dilation_height_factor;
|
|
||||||
} TfLiteConvParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLitePadding padding;
|
|
||||||
int stride_width;
|
|
||||||
int stride_height;
|
|
||||||
int stride_depth;
|
|
||||||
int dilation_width_factor;
|
|
||||||
int dilation_height_factor;
|
|
||||||
int dilation_depth_factor;
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
} TfLiteConv3DParams;
|
|
||||||
|
|
||||||
typedef TfLiteConv3DParams TfLiteConv3DTransposeParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLitePadding padding;
|
|
||||||
int stride_width;
|
|
||||||
int stride_height;
|
|
||||||
int filter_width;
|
|
||||||
int filter_height;
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
struct {
|
|
||||||
TfLitePaddingValues padding;
|
|
||||||
} computed;
|
|
||||||
} TfLitePoolParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
// Parameters for DepthwiseConv version 1 or above.
|
|
||||||
TfLitePadding padding;
|
|
||||||
int stride_width;
|
|
||||||
int stride_height;
|
|
||||||
// `depth_multiplier` is redundant. It's used by CPU kernels in
|
|
||||||
// TensorFlow 2.0 or below, but ignored in versions above.
|
|
||||||
//
|
|
||||||
// The information can be deduced from the shape of input and the shape of
|
|
||||||
// weights. Since the TFLiteConverter toolchain doesn't support partially
|
|
||||||
// specified shapes, relying on `depth_multiplier` stops us from supporting
|
|
||||||
// graphs with dynamic shape tensors.
|
|
||||||
//
|
|
||||||
// Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
|
|
||||||
// field.
|
|
||||||
int depth_multiplier;
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
// Parameters for DepthwiseConv version 2 or above.
|
|
||||||
int dilation_width_factor;
|
|
||||||
int dilation_height_factor;
|
|
||||||
} TfLiteDepthwiseConvParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int rank;
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
|
|
||||||
// Parameter for SVDF version 4.
|
|
||||||
bool asymmetric_quantize_inputs;
|
|
||||||
} TfLiteSVDFParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
|
|
||||||
// Parameter for RNN version 3.
|
|
||||||
bool asymmetric_quantize_inputs;
|
|
||||||
} TfLiteRNNParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool time_major;
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
|
|
||||||
// Parameter for Sequence RNN version 3.
|
|
||||||
bool asymmetric_quantize_inputs;
|
|
||||||
} TfLiteSequenceRNNParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool time_major;
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
bool merge_outputs;
|
|
||||||
|
|
||||||
// Parameter for Bidirectional RNN verison 3.
|
|
||||||
bool asymmetric_quantize_inputs;
|
|
||||||
} TfLiteBidirectionalSequenceRNNParams;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
kTfLiteFullyConnectedWeightsFormatDefault = 0,
|
|
||||||
kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
|
|
||||||
} TfLiteFullyConnectedWeightsFormat;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
// Parameters for FullyConnected version 1 or above.
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
|
|
||||||
// Parameters for FullyConnected version 2 or above.
|
|
||||||
TfLiteFullyConnectedWeightsFormat weights_format;
|
|
||||||
|
|
||||||
// Parameters for FullyConnected version 5 or above.
|
|
||||||
// If set to true, then the number of dimensions in the input and the output
|
|
||||||
// tensors are the same. Furthermore, all but the last dimension of the input
|
|
||||||
// and output shapes will be equal.
|
|
||||||
bool keep_num_dims;
|
|
||||||
|
|
||||||
// Parameters for FullyConnected version 7 or above.
|
|
||||||
// If set to true and the weights are quantized, then non constant inputs
|
|
||||||
// are quantized at evaluation time with asymmetric quantization.
|
|
||||||
bool asymmetric_quantize_inputs;
|
|
||||||
} TfLiteFullyConnectedParams;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
kTfLiteLshProjectionUnknown = 0,
|
|
||||||
kTfLiteLshProjectionSparse = 1,
|
|
||||||
kTfLiteLshProjectionDense = 2,
|
|
||||||
} TfLiteLSHProjectionType;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteLSHProjectionType type;
|
|
||||||
} TfLiteLSHProjectionParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
float beta;
|
|
||||||
} TfLiteSoftmaxParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int axis;
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
} TfLiteConcatenationParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
// Parameter added for the version 4.
|
|
||||||
bool pot_scale_int16;
|
|
||||||
} TfLiteAddParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
EmptyStructPlaceholder placeholder;
|
|
||||||
} TfLiteSpaceToBatchNDParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
EmptyStructPlaceholder placeholder;
|
|
||||||
} TfLiteBatchToSpaceNDParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool adj_x;
|
|
||||||
bool adj_y;
|
|
||||||
// Parameters for BatchMatMul version 4 or above.
|
|
||||||
// If set to true and the weights are quantized, then non constant inputs
|
|
||||||
// are quantized at evaluation time with asymmetric quantization.
|
|
||||||
bool asymmetric_quantize_inputs;
|
|
||||||
} TfLiteBatchMatMulParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
} TfLiteMulParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
// Parameter added for the version 5.
|
|
||||||
bool pot_scale_int16;
|
|
||||||
} TfLiteSubParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
} TfLiteDivParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
} TfLiteL2NormParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int radius;
|
|
||||||
float bias;
|
|
||||||
float alpha;
|
|
||||||
float beta;
|
|
||||||
} TfLiteLocalResponseNormParams;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
kTfLiteLSTMFullKernel = 0,
|
|
||||||
kTfLiteLSTMBasicKernel
|
|
||||||
} TfLiteLSTMKernelType;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
// Parameters for LSTM version 1.
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
float cell_clip;
|
|
||||||
float proj_clip;
|
|
||||||
|
|
||||||
// Parameters for LSTM version 2.
|
|
||||||
// kTfLiteLSTMBasicKernel is only supported in version 2 or above.
|
|
||||||
TfLiteLSTMKernelType kernel_type;
|
|
||||||
|
|
||||||
// Parameters for LSTM version 4.
|
|
||||||
bool asymmetric_quantize_inputs;
|
|
||||||
} TfLiteLSTMParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
// Parameters needed for the underlying LSTM.
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
float cell_clip;
|
|
||||||
float proj_clip;
|
|
||||||
|
|
||||||
// If set to true then the first dimension is time, otherwise batch.
|
|
||||||
bool time_major;
|
|
||||||
|
|
||||||
// Parameter for unidirectional sequence RNN version 3.
|
|
||||||
bool asymmetric_quantize_inputs;
|
|
||||||
} TfLiteUnidirectionalSequenceLSTMParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
// Parameters supported by version 1:
|
|
||||||
// Parameters inherited for the LSTM kernel.
|
|
||||||
TfLiteFusedActivation activation;
|
|
||||||
float cell_clip;
|
|
||||||
float proj_clip;
|
|
||||||
|
|
||||||
// If true, store the outputs of both directions in the first output.
|
|
||||||
bool merge_outputs;
|
|
||||||
|
|
||||||
// Parameters supported by version 2:
|
|
||||||
// If set to true then the first dimension is time, otherwise batch.
|
|
||||||
bool time_major;
|
|
||||||
|
|
||||||
// Parameters supported by version 4:
|
|
||||||
// If set to true, then hybrid ops use asymmetric quantization for inputs.
|
|
||||||
bool asymmetric_quantize_inputs;
|
|
||||||
} TfLiteBidirectionalSequenceLSTMParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool align_corners;
|
|
||||||
// half_pixel_centers assumes pixels are of half the actual dimensions, and
|
|
||||||
// yields more accurate resizes. Corresponds to the same argument for the
|
|
||||||
// original TensorFlow op in TF2.0.
|
|
||||||
bool half_pixel_centers;
|
|
||||||
} TfLiteResizeBilinearParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool align_corners;
|
|
||||||
bool half_pixel_centers;
|
|
||||||
} TfLiteResizeNearestNeighborParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
EmptyStructPlaceholder placeholder;
|
|
||||||
} TfLitePadParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
EmptyStructPlaceholder placeholder;
|
|
||||||
} TfLitePadV2Params;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
// These fields are only used in old models for backward compatibility.
|
|
||||||
// In the current implementation, we use the 2nd input of the op as the shape,
|
|
||||||
// and these fields are unused.
|
|
||||||
int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
|
|
||||||
int num_dimensions;
|
|
||||||
} TfLiteReshapeParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int ngram_size;
|
|
||||||
int max_skip_size;
|
|
||||||
bool include_all_ngrams;
|
|
||||||
} TfLiteSkipGramParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int block_size;
|
|
||||||
} TfLiteSpaceToDepthParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int block_size;
|
|
||||||
} TfLiteDepthToSpaceParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteType in_data_type;
|
|
||||||
TfLiteType out_data_type;
|
|
||||||
} TfLiteCastParams;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
kTfLiteCombinerTypeSum = 0,
|
|
||||||
kTfLiteCombinerTypeMean = 1,
|
|
||||||
kTfLiteCombinerTypeSqrtn = 2,
|
|
||||||
} TfLiteCombinerType;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteCombinerType combiner;
|
|
||||||
} TfLiteEmbeddingLookupSparseParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int axis;
|
|
||||||
int batch_dims;
|
|
||||||
} TfLiteGatherParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
EmptyStructPlaceholder placeholder;
|
|
||||||
} TfLiteTransposeParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool keep_dims;
|
|
||||||
} TfLiteReducerParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int num_splits;
|
|
||||||
} TfLiteSplitParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int num_splits;
|
|
||||||
} TfLiteSplitVParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
// TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
|
|
||||||
// For now we will fix the maximum possible number of dimensions.
|
|
||||||
int squeeze_dims[8];
|
|
||||||
int num_squeeze_dims;
|
|
||||||
} TfLiteSqueezeParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int begin_mask;
|
|
||||||
int end_mask;
|
|
||||||
int ellipsis_mask;
|
|
||||||
int new_axis_mask;
|
|
||||||
int shrink_axis_mask;
|
|
||||||
} TfLiteStridedSliceParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteType output_type;
|
|
||||||
} TfLiteArgMaxParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteType output_type;
|
|
||||||
} TfLiteArgMinParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLitePadding padding;
|
|
||||||
int stride_width;
|
|
||||||
int stride_height;
|
|
||||||
} TfLiteTransposeConvParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool validate_indices;
|
|
||||||
} TfLiteSparseToDenseParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteType out_type;
|
|
||||||
} TfLiteShapeParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
EmptyStructPlaceholder placeholder;
|
|
||||||
} TfLiteRankParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
// Parameters supported by version 1:
|
|
||||||
float min;
|
|
||||||
float max;
|
|
||||||
int num_bits;
|
|
||||||
|
|
||||||
// Parameters supported by version 2:
|
|
||||||
bool narrow_range;
|
|
||||||
} TfLiteFakeQuantParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int values_count;
|
|
||||||
int axis;
|
|
||||||
} TfLitePackParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int axis;
|
|
||||||
} TfLiteOneHotParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int num;
|
|
||||||
int axis;
|
|
||||||
} TfLiteUnpackParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
float alpha;
|
|
||||||
} TfLiteLeakyReluParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
TfLiteType index_out_type;
|
|
||||||
} TfLiteUniqueParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int seq_dim;
|
|
||||||
int batch_dim;
|
|
||||||
} TfLiteReverseSequenceParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
EmptyStructPlaceholder placeholder;
|
|
||||||
} TfLiteMatrixDiagParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
EmptyStructPlaceholder placeholder;
|
|
||||||
} TfLiteMatrixSetDiagParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int then_subgraph_index;
|
|
||||||
int else_subgraph_index;
|
|
||||||
} TfLiteIfParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int cond_subgraph_index;
|
|
||||||
int body_subgraph_index;
|
|
||||||
} TfLiteWhileParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool exclusive;
|
|
||||||
bool reverse;
|
|
||||||
} TfLiteCumsumParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int init_subgraph_index;
|
|
||||||
} TfLiteCallOnceParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int table_id;
|
|
||||||
TfLiteType key_dtype;
|
|
||||||
TfLiteType value_dtype;
|
|
||||||
} TfLiteHashtableParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const char* container;
|
|
||||||
const char* shared_name;
|
|
||||||
} TfLiteVarHandleParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int seed;
|
|
||||||
int seed2;
|
|
||||||
} TfLiteRandomParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int num_boundaries;
|
|
||||||
// This points to the memory stored in the model (flatbuffer),
|
|
||||||
// and is not owned.
|
|
||||||
const float* boundaries;
|
|
||||||
} TfLiteBucketizeParams;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
bool approximate;
|
|
||||||
} TfLiteGeluParams;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif // __cplusplus
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
|
|
||||||
@@ -1,130 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
// This file declares types used by the pure C inference API defined in c_api.h,
|
|
||||||
// some of which are also used in the C++ and C kernel and interpreter APIs.
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_
|
|
||||||
#define TENSORFLOW_LITE_C_C_API_TYPES_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
|
|
||||||
// library.
|
|
||||||
#ifdef SWIG
|
|
||||||
#define TFL_CAPI_EXPORT
|
|
||||||
#elif defined(TFL_STATIC_LIBRARY_BUILD)
|
|
||||||
#define TFL_CAPI_EXPORT
|
|
||||||
#else // not definded TFL_STATIC_LIBRARY_BUILD
|
|
||||||
#if defined(_WIN32)
|
|
||||||
#ifdef TFL_COMPILE_LIBRARY
|
|
||||||
#define TFL_CAPI_EXPORT __declspec(dllexport)
|
|
||||||
#else
|
|
||||||
#define TFL_CAPI_EXPORT __declspec(dllimport)
|
|
||||||
#endif // TFL_COMPILE_LIBRARY
|
|
||||||
#else
|
|
||||||
#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
|
|
||||||
#endif // _WIN32
|
|
||||||
#endif // SWIG
|
|
||||||
|
|
||||||
// Note that new error status values may be added in future in order to
|
|
||||||
// indicate more fine-grained internal states, therefore, applications should
|
|
||||||
// not rely on status values being members of the enum.
|
|
||||||
typedef enum TfLiteStatus {
|
|
||||||
kTfLiteOk = 0,
|
|
||||||
|
|
||||||
// Generally referring to an error in the runtime (i.e. interpreter)
|
|
||||||
kTfLiteError = 1,
|
|
||||||
|
|
||||||
// Generally referring to an error from a TfLiteDelegate itself.
|
|
||||||
kTfLiteDelegateError = 2,
|
|
||||||
|
|
||||||
// Generally referring to an error in applying a delegate due to
|
|
||||||
// incompatibility between runtime and delegate, e.g., this error is returned
|
|
||||||
// when trying to apply a TF Lite delegate onto a model graph that's already
|
|
||||||
// immutable.
|
|
||||||
kTfLiteApplicationError = 3,
|
|
||||||
|
|
||||||
// Generally referring to serialized delegate data not being found.
|
|
||||||
// See tflite::delegates::Serialization.
|
|
||||||
kTfLiteDelegateDataNotFound = 4,
|
|
||||||
|
|
||||||
// Generally referring to data-writing issues in delegate serialization.
|
|
||||||
// See tflite::delegates::Serialization.
|
|
||||||
kTfLiteDelegateDataWriteError = 5,
|
|
||||||
|
|
||||||
// Generally referring to data-reading issues in delegate serialization.
|
|
||||||
// See tflite::delegates::Serialization.
|
|
||||||
kTfLiteDelegateDataReadError = 6,
|
|
||||||
|
|
||||||
// Generally referring to issues when the TF Lite model has ops that cannot be
|
|
||||||
// resolved at runtime. This could happen when the specific op is not
|
|
||||||
// registered or built with the TF Lite framework.
|
|
||||||
kTfLiteUnresolvedOps = 7,
|
|
||||||
} TfLiteStatus;
|
|
||||||
|
|
||||||
// Types supported by tensor
|
|
||||||
typedef enum {
|
|
||||||
kTfLiteNoType = 0,
|
|
||||||
kTfLiteFloat32 = 1,
|
|
||||||
kTfLiteInt32 = 2,
|
|
||||||
kTfLiteUInt8 = 3,
|
|
||||||
kTfLiteInt64 = 4,
|
|
||||||
kTfLiteString = 5,
|
|
||||||
kTfLiteBool = 6,
|
|
||||||
kTfLiteInt16 = 7,
|
|
||||||
kTfLiteComplex64 = 8,
|
|
||||||
kTfLiteInt8 = 9,
|
|
||||||
kTfLiteFloat16 = 10,
|
|
||||||
kTfLiteFloat64 = 11,
|
|
||||||
kTfLiteComplex128 = 12,
|
|
||||||
kTfLiteUInt64 = 13,
|
|
||||||
kTfLiteResource = 14,
|
|
||||||
kTfLiteVariant = 15,
|
|
||||||
kTfLiteUInt32 = 16,
|
|
||||||
kTfLiteUInt16 = 17,
|
|
||||||
} TfLiteType;
|
|
||||||
|
|
||||||
// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
|
|
||||||
// If per-layer quantization is specified this field will still be populated in
|
|
||||||
// addition to TfLiteAffineQuantization.
|
|
||||||
// Parameters for asymmetric quantization. Quantized values can be converted
|
|
||||||
// back to float using:
|
|
||||||
// real_value = scale * (quantized_value - zero_point)
|
|
||||||
typedef struct TfLiteQuantizationParams {
|
|
||||||
float scale;
|
|
||||||
int32_t zero_point;
|
|
||||||
} TfLiteQuantizationParams;
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
|
||||||
// Opaque types used by c_api.h, c_api_opaque.h and common.h.
|
|
||||||
|
|
||||||
// TfLiteOpaqueContext is an opaque version of TfLiteContext;
|
|
||||||
typedef struct TfLiteOpaqueContext TfLiteOpaqueContext;
|
|
||||||
|
|
||||||
// TfLiteOpaqueNode is an opaque version of TfLiteNode;
|
|
||||||
typedef struct TfLiteOpaqueNode TfLiteOpaqueNode;
|
|
||||||
|
|
||||||
// TfLiteOpaqueTensor is an opaque version of TfLiteTensor;
|
|
||||||
typedef struct TfLiteOpaqueTensor TfLiteOpaqueTensor;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern C
|
|
||||||
#endif
|
|
||||||
#endif // TENSORFLOW_LITE_C_C_API_TYPES_H_
|
|
||||||
@@ -1,286 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/c_api_types.h"
|
|
||||||
#ifdef TF_LITE_TENSORFLOW_PROFILER
|
|
||||||
#include "tensorflow/lite/tensorflow_profiler_logger.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TF_LITE_STATIC_MEMORY
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#endif // TF_LITE_STATIC_MEMORY
|
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
|
|
||||||
size_t TfLiteIntArrayGetSizeInBytes(int size) {
|
|
||||||
static TfLiteIntArray dummy;
|
|
||||||
|
|
||||||
size_t computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size;
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
// Context for why this is needed is in http://b/189926408#comment21
|
|
||||||
computed_size -= sizeof(dummy.data[0]);
|
|
||||||
#endif
|
|
||||||
return computed_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b) {
|
|
||||||
if (a == b) return 1;
|
|
||||||
if (a == nullptr || b == nullptr) return 0;
|
|
||||||
return TfLiteIntArrayEqualsArray(a, b->size, b->data);
|
|
||||||
}
|
|
||||||
|
|
||||||
int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
|
|
||||||
const int b_data[]) {
|
|
||||||
if (a == nullptr) return (b_size == 0);
|
|
||||||
if (a->size != b_size) return 0;
|
|
||||||
int i = 0;
|
|
||||||
for (; i < a->size; i++)
|
|
||||||
if (a->data[i] != b_data[i]) return 0;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef TF_LITE_STATIC_MEMORY
|
|
||||||
|
|
||||||
TfLiteIntArray* TfLiteIntArrayCreate(int size) {
|
|
||||||
size_t alloc_size = TfLiteIntArrayGetSizeInBytes(size);
|
|
||||||
if (alloc_size <= 0) return nullptr;
|
|
||||||
TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
|
|
||||||
if (!ret) return ret;
|
|
||||||
ret->size = size;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src) {
|
|
||||||
if (!src) return nullptr;
|
|
||||||
TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size);
|
|
||||||
if (ret) {
|
|
||||||
memcpy(ret->data, src->data, src->size * sizeof(int));
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); }
|
|
||||||
|
|
||||||
#endif // TF_LITE_STATIC_MEMORY
|
|
||||||
|
|
||||||
int TfLiteFloatArrayGetSizeInBytes(int size) {
|
|
||||||
static TfLiteFloatArray dummy;
|
|
||||||
|
|
||||||
int computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size;
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
// Context for why this is needed is in http://b/189926408#comment21
|
|
||||||
computed_size -= sizeof(dummy.data[0]);
|
|
||||||
#endif
|
|
||||||
return computed_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef TF_LITE_STATIC_MEMORY
|
|
||||||
|
|
||||||
TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
|
|
||||||
TfLiteFloatArray* ret =
|
|
||||||
(TfLiteFloatArray*)malloc(TfLiteFloatArrayGetSizeInBytes(size));
|
|
||||||
ret->size = size;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
|
|
||||||
|
|
||||||
void TfLiteTensorDataFree(TfLiteTensor* t) {
|
|
||||||
if (t->allocation_type == kTfLiteDynamic ||
|
|
||||||
t->allocation_type == kTfLitePersistentRo) {
|
|
||||||
if (t->data.raw) {
|
|
||||||
#ifdef TF_LITE_TENSORFLOW_PROFILER
|
|
||||||
tflite::OnTfLiteTensorDealloc(t);
|
|
||||||
#endif
|
|
||||||
free(t->data.raw);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
t->data.raw = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
|
|
||||||
if (quantization->type == kTfLiteAffineQuantization) {
|
|
||||||
TfLiteAffineQuantization* q_params =
|
|
||||||
(TfLiteAffineQuantization*)(quantization->params);
|
|
||||||
if (q_params->scale) {
|
|
||||||
TfLiteFloatArrayFree(q_params->scale);
|
|
||||||
q_params->scale = nullptr;
|
|
||||||
}
|
|
||||||
if (q_params->zero_point) {
|
|
||||||
TfLiteIntArrayFree(q_params->zero_point);
|
|
||||||
q_params->zero_point = nullptr;
|
|
||||||
}
|
|
||||||
free(q_params);
|
|
||||||
}
|
|
||||||
quantization->params = nullptr;
|
|
||||||
quantization->type = kTfLiteNoQuantization;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
|
|
||||||
if (sparsity == nullptr) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sparsity->traversal_order) {
|
|
||||||
TfLiteIntArrayFree(sparsity->traversal_order);
|
|
||||||
sparsity->traversal_order = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sparsity->block_map) {
|
|
||||||
TfLiteIntArrayFree(sparsity->block_map);
|
|
||||||
sparsity->block_map = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sparsity->dim_metadata) {
|
|
||||||
int i = 0;
|
|
||||||
for (; i < sparsity->dim_metadata_size; i++) {
|
|
||||||
TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i];
|
|
||||||
if (metadata.format == kTfLiteDimSparseCSR) {
|
|
||||||
TfLiteIntArrayFree(metadata.array_segments);
|
|
||||||
metadata.array_segments = nullptr;
|
|
||||||
TfLiteIntArrayFree(metadata.array_indices);
|
|
||||||
metadata.array_indices = nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
free(sparsity->dim_metadata);
|
|
||||||
sparsity->dim_metadata = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
free(sparsity);
|
|
||||||
}
|
|
||||||
|
|
||||||
void TfLiteTensorFree(TfLiteTensor* t) {
|
|
||||||
TfLiteTensorDataFree(t);
|
|
||||||
if (t->dims) TfLiteIntArrayFree(t->dims);
|
|
||||||
t->dims = nullptr;
|
|
||||||
|
|
||||||
if (t->dims_signature) {
|
|
||||||
TfLiteIntArrayFree((TfLiteIntArray*)t->dims_signature);
|
|
||||||
}
|
|
||||||
t->dims_signature = nullptr;
|
|
||||||
|
|
||||||
TfLiteQuantizationFree(&t->quantization);
|
|
||||||
TfLiteSparsityFree(t->sparsity);
|
|
||||||
t->sparsity = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
|
|
||||||
TfLiteQuantizationParams quantization, char* buffer,
|
|
||||||
size_t size, TfLiteAllocationType allocation_type,
|
|
||||||
const void* allocation, bool is_variable,
|
|
||||||
TfLiteTensor* tensor) {
|
|
||||||
TfLiteTensorFree(tensor);
|
|
||||||
tensor->type = type;
|
|
||||||
tensor->name = name;
|
|
||||||
tensor->dims = dims;
|
|
||||||
tensor->params = quantization;
|
|
||||||
tensor->data.raw = buffer;
|
|
||||||
tensor->bytes = size;
|
|
||||||
tensor->allocation_type = allocation_type;
|
|
||||||
tensor->allocation = allocation;
|
|
||||||
tensor->is_variable = is_variable;
|
|
||||||
|
|
||||||
tensor->quantization.type = kTfLiteNoQuantization;
|
|
||||||
tensor->quantization.params = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) {
|
|
||||||
if (!src || !dst) return kTfLiteOk;
|
|
||||||
if (src->bytes != dst->bytes) return kTfLiteError;
|
|
||||||
if (src == dst) return kTfLiteOk;
|
|
||||||
|
|
||||||
dst->type = src->type;
|
|
||||||
if (dst->dims) TfLiteIntArrayFree(dst->dims);
|
|
||||||
dst->dims = TfLiteIntArrayCopy(src->dims);
|
|
||||||
memcpy(dst->data.raw, src->data.raw, src->bytes);
|
|
||||||
dst->buffer_handle = src->buffer_handle;
|
|
||||||
dst->data_is_stale = src->data_is_stale;
|
|
||||||
dst->delegate = src->delegate;
|
|
||||||
|
|
||||||
return kTfLiteOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
|
|
||||||
if (tensor->allocation_type != kTfLiteDynamic &&
|
|
||||||
tensor->allocation_type != kTfLitePersistentRo) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// TODO(b/145340303): Tensor data should be aligned.
|
|
||||||
if (!tensor->data.raw) {
|
|
||||||
tensor->data.raw = (char*)malloc(num_bytes);
|
|
||||||
#ifdef TF_LITE_TENSORFLOW_PROFILER
|
|
||||||
tflite::OnTfLiteTensorAlloc(tensor, num_bytes);
|
|
||||||
#endif
|
|
||||||
} else if (num_bytes > tensor->bytes) {
|
|
||||||
#ifdef TF_LITE_TENSORFLOW_PROFILER
|
|
||||||
tflite::OnTfLiteTensorDealloc(tensor);
|
|
||||||
#endif
|
|
||||||
tensor->data.raw = (char*)realloc(tensor->data.raw, num_bytes);
|
|
||||||
#ifdef TF_LITE_TENSORFLOW_PROFILER
|
|
||||||
tflite::OnTfLiteTensorAlloc(tensor, num_bytes);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
tensor->bytes = num_bytes;
|
|
||||||
}
|
|
||||||
#endif // TF_LITE_STATIC_MEMORY
|
|
||||||
|
|
||||||
const char* TfLiteTypeGetName(TfLiteType type) {
|
|
||||||
switch (type) {
|
|
||||||
case kTfLiteNoType:
|
|
||||||
return "NOTYPE";
|
|
||||||
case kTfLiteFloat32:
|
|
||||||
return "FLOAT32";
|
|
||||||
case kTfLiteUInt16:
|
|
||||||
return "UINT16";
|
|
||||||
case kTfLiteInt16:
|
|
||||||
return "INT16";
|
|
||||||
case kTfLiteInt32:
|
|
||||||
return "INT32";
|
|
||||||
case kTfLiteUInt32:
|
|
||||||
return "UINT32";
|
|
||||||
case kTfLiteUInt8:
|
|
||||||
return "UINT8";
|
|
||||||
case kTfLiteInt8:
|
|
||||||
return "INT8";
|
|
||||||
case kTfLiteInt64:
|
|
||||||
return "INT64";
|
|
||||||
case kTfLiteUInt64:
|
|
||||||
return "UINT64";
|
|
||||||
case kTfLiteBool:
|
|
||||||
return "BOOL";
|
|
||||||
case kTfLiteComplex64:
|
|
||||||
return "COMPLEX64";
|
|
||||||
case kTfLiteComplex128:
|
|
||||||
return "COMPLEX128";
|
|
||||||
case kTfLiteString:
|
|
||||||
return "STRING";
|
|
||||||
case kTfLiteFloat16:
|
|
||||||
return "FLOAT16";
|
|
||||||
case kTfLiteFloat64:
|
|
||||||
return "FLOAT64";
|
|
||||||
case kTfLiteResource:
|
|
||||||
return "RESOURCE";
|
|
||||||
case kTfLiteVariant:
|
|
||||||
return "VARIANT";
|
|
||||||
}
|
|
||||||
return "Unknown type";
|
|
||||||
}
|
|
||||||
|
|
||||||
TfLiteDelegate TfLiteDelegateCreate() { return TfLiteDelegate{}; }
|
|
||||||
|
|
||||||
} // extern "C"
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,51 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
// This provides a few C++ helpers that are useful for manipulating C structures
|
|
||||||
// in C++.
|
|
||||||
#ifndef TENSORFLOW_LITE_CONTEXT_UTIL_H_
|
|
||||||
#define TENSORFLOW_LITE_CONTEXT_UTIL_H_
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
// Provide a range iterable wrapper for TfLiteIntArray* (C lists that TfLite
|
|
||||||
// C api uses. Can't use the google array_view, since we can't depend on even
|
|
||||||
// absl for embedded device reasons.
|
|
||||||
class TfLiteIntArrayView {
|
|
||||||
public:
|
|
||||||
// Construct a view of a TfLiteIntArray*. Note, `int_array` should be non-null
|
|
||||||
// and this view does not take ownership of it.
|
|
||||||
explicit TfLiteIntArrayView(const TfLiteIntArray* int_array)
|
|
||||||
: int_array_(int_array) {}
|
|
||||||
|
|
||||||
TfLiteIntArrayView(const TfLiteIntArrayView&) = default;
|
|
||||||
TfLiteIntArrayView& operator=(const TfLiteIntArrayView& rhs) = default;
|
|
||||||
|
|
||||||
typedef const int* const_iterator;
|
|
||||||
const_iterator begin() const { return int_array_->data; }
|
|
||||||
const_iterator end() const { return &int_array_->data[int_array_->size]; }
|
|
||||||
size_t size() const { return end() - begin(); }
|
|
||||||
int operator[](size_t pos) const { return int_array_->data[pos]; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
const TfLiteIntArray* int_array_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_CONTEXT_UTIL_H_
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
|
||||||
#include <cstdarg>
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
int ErrorReporter::Report(const char* format, ...) {
|
|
||||||
va_list args;
|
|
||||||
va_start(args, format);
|
|
||||||
int code = Report(format, args);
|
|
||||||
va_end(args);
|
|
||||||
return code;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(aselle): Make the name of ReportError on context the same, so
|
|
||||||
// we can use the ensure functions w/o a context and w/ a reporter.
|
|
||||||
int ErrorReporter::ReportError(void*, const char* format, ...) {
|
|
||||||
va_list args;
|
|
||||||
va_start(args, format);
|
|
||||||
int code = Report(format, args);
|
|
||||||
va_end(args);
|
|
||||||
return code;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
@@ -1,59 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
|
|
||||||
#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
|
|
||||||
|
|
||||||
#include <cstdarg>
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
/// A functor that reports error to supporting system. Invoked similar to
|
|
||||||
/// printf.
|
|
||||||
///
|
|
||||||
/// Usage:
|
|
||||||
/// ErrorReporter foo;
|
|
||||||
/// foo.Report("test %d", 5);
|
|
||||||
/// or
|
|
||||||
/// va_list args;
|
|
||||||
/// foo.Report("test %d", args); // where args is va_list
|
|
||||||
///
|
|
||||||
/// Subclass ErrorReporter to provide another reporting destination.
|
|
||||||
/// For example, if you have a GUI program, you might redirect to a buffer
|
|
||||||
/// that drives a GUI error log box.
|
|
||||||
class ErrorReporter {
|
|
||||||
public:
|
|
||||||
virtual ~ErrorReporter() {}
|
|
||||||
virtual int Report(const char* format, va_list args) = 0;
|
|
||||||
int Report(const char* format, ...);
|
|
||||||
int ReportError(void*, const char* format, ...);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
// You should not make bare calls to the error reporter, instead use the
|
|
||||||
// TF_LITE_REPORT_ERROR macro, since this allows message strings to be
|
|
||||||
// stripped when the binary size has to be optimized. If you are looking to
|
|
||||||
// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and
|
|
||||||
// every call will be stubbed out, taking no memory.
|
|
||||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
|
||||||
#define TF_LITE_REPORT_ERROR(reporter, ...) \
|
|
||||||
do { \
|
|
||||||
static_cast<tflite::ErrorReporter*>(reporter)->Report(__VA_ARGS__); \
|
|
||||||
} while (false)
|
|
||||||
#else // TF_LITE_STRIP_ERROR_STRINGS
|
|
||||||
#define TF_LITE_REPORT_ERROR(reporter, ...)
|
|
||||||
#endif // TF_LITE_STRIP_ERROR_STRINGS
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,408 +0,0 @@
|
|||||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
|
|
||||||
#define TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
|
|
||||||
|
|
||||||
// These functions transform codes and data structures that are defined in the
|
|
||||||
// flatbuffer serialization format into in-memory values that are used by the
|
|
||||||
// runtime API and interpreter.
|
|
||||||
|
|
||||||
#include <cstddef>
|
|
||||||
#include <new>
|
|
||||||
#include <type_traits>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
|
||||||
#include "tensorflow/lite/schema/schema_generated.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
// Interface class for builtin data allocations.
|
|
||||||
class BuiltinDataAllocator {
|
|
||||||
public:
|
|
||||||
virtual void* Allocate(size_t size, size_t alignment_hint) = 0;
|
|
||||||
virtual void Deallocate(void* data) = 0;
|
|
||||||
|
|
||||||
// Allocate a structure, but make sure it is a POD structure that doesn't
|
|
||||||
// require constructors to run. The reason we do this, is that Interpreter's C
|
|
||||||
// extension part will take ownership so destructors will not be run during
|
|
||||||
// deallocation.
|
|
||||||
template <typename T>
|
|
||||||
T* AllocatePOD() {
|
|
||||||
// TODO(b/154346074): Change this to is_trivially_destructible when all
|
|
||||||
// platform targets support that properly.
|
|
||||||
static_assert(std::is_pod<T>::value, "Builtin data structure must be POD.");
|
|
||||||
void* allocated_memory = this->Allocate(sizeof(T), alignof(T));
|
|
||||||
return new (allocated_memory) T();
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual ~BuiltinDataAllocator() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Parse the appropriate data out of the op.
|
|
||||||
//
|
|
||||||
// This handles builtin data explicitly as there are flatbuffer schemas.
|
|
||||||
// If it returns kTfLiteOk, it passes the data out with `builtin_data`. The
|
|
||||||
// calling function has to pass in an allocator object, and this allocator
|
|
||||||
// will be called to reserve space for the output data. If the calling
|
|
||||||
// function's allocator reserves memory on the heap, then it's the calling
|
|
||||||
// function's responsibility to free it.
|
|
||||||
// If it returns kTfLiteError, `builtin_data` will be `nullptr`.
|
|
||||||
TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
// Converts the tensor data type used in the flat buffer to the representation
|
|
||||||
// used by the runtime.
|
|
||||||
TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
|
|
||||||
ErrorReporter* error_reporter);
|
|
||||||
|
|
||||||
TfLiteStatus ParseAbs(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseAddN(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseAssignVariable(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseBatchMatMul(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseBatchToSpaceNd(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseBroadcastArgs(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseBroadcastTo(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseCallOnce(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseCast(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseConcatenation(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseCos(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseCumsum(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseDepthToSpace(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseDepthwiseConv2D(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseDequantize(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseDiv(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseElu(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseExp(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseExpandDims(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseFill(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseFloor(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseFloorDiv(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseFloorMod(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseFullyConnected(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseGather(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseGatherNd(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseGreater(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseGreaterEqual(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseL2Normalization(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseLeakyRelu(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseLess(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseLessEqual(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseLog(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseLogicalAnd(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseLogicalNot(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseLogicalOr(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseLogistic(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseLogSoftmax(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseLSTM(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseMirrorPad(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseNeg(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseNotEqual(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParsePad(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParsePadV2(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParsePow(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParsePrelu(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseReadVariable(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseRelu(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseRelu6(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseResizeBilinear(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseResizeNearestNeighbor(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSin(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSlice(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSpaceToBatchNd(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSpaceToDepth(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSplitV(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSqueeze(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSquaredDifference(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseStridedSlice(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseTanh(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseTranspose(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseTransposeConv(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseUnidirectionalSequenceLSTM(const Operator* op,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseVarHandle(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseWhile(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
|
||||||
|
|
||||||
TfLiteStatus ParseZerosLike(const Operator* op, ErrorReporter* error_reporter,
|
|
||||||
BuiltinDataAllocator* allocator,
|
|
||||||
void** builtin_data);
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
|
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
|
||||||
|
|
||||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
|
||||||
#include "tensorflow/lite/schema/schema_utils.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
TfLiteStatus GetRegistrationFromOpCode(
|
|
||||||
const OperatorCode* opcode, const OpResolver& op_resolver,
|
|
||||||
ErrorReporter* error_reporter, const TfLiteRegistration** registration) {
|
|
||||||
TfLiteStatus status = kTfLiteOk;
|
|
||||||
*registration = nullptr;
|
|
||||||
auto builtin_code = GetBuiltinCode(opcode);
|
|
||||||
int version = opcode->version();
|
|
||||||
|
|
||||||
if (builtin_code > BuiltinOperator_MAX) {
|
|
||||||
TF_LITE_REPORT_ERROR(
|
|
||||||
error_reporter,
|
|
||||||
"Op builtin_code out of range: %d. Are you using old TFLite binary "
|
|
||||||
"with newer model?",
|
|
||||||
builtin_code);
|
|
||||||
status = kTfLiteError;
|
|
||||||
} else if (builtin_code != BuiltinOperator_CUSTOM) {
|
|
||||||
*registration = op_resolver.FindOp(builtin_code, version);
|
|
||||||
if (*registration == nullptr) {
|
|
||||||
TF_LITE_REPORT_ERROR(
|
|
||||||
error_reporter,
|
|
||||||
"Didn't find op for builtin opcode '%s' version '%d'. "
|
|
||||||
"An older version of this builtin might be supported. "
|
|
||||||
"Are you using an old TFLite binary with a newer model?\n",
|
|
||||||
EnumNameBuiltinOperator(builtin_code), version);
|
|
||||||
status = kTfLiteError;
|
|
||||||
}
|
|
||||||
} else if (!opcode->custom_code()) {
|
|
||||||
TF_LITE_REPORT_ERROR(
|
|
||||||
error_reporter,
|
|
||||||
"Operator with CUSTOM builtin_code has no custom_code.\n");
|
|
||||||
status = kTfLiteError;
|
|
||||||
} else {
|
|
||||||
const char* name = opcode->custom_code()->c_str();
|
|
||||||
*registration = op_resolver.FindOp(name, version);
|
|
||||||
if (*registration == nullptr) {
|
|
||||||
// Do not report error for unresolved custom op, we do the final check
|
|
||||||
// while preparing ops.
|
|
||||||
status = kTfLiteError;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
@@ -1,140 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
|
|
||||||
#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
|
|
||||||
|
|
||||||
#include <functional>
|
|
||||||
#include <memory>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
|
||||||
#include "tensorflow/lite/schema/schema_generated.h"
|
|
||||||
|
|
||||||
// Opaque type similar to TfLiteDelegate / TfLiteOpaqueDelegate.
|
|
||||||
// This is used for cases (e.g. when using "TF Lite with Google Play Services")
|
|
||||||
// where the TF Lite runtime might be built using a newer (or older)
|
|
||||||
// version of the TF Lite sources than the app, and hence might have a
|
|
||||||
// different definition of the TfLiteDelegate type. TF Lite APIs use
|
|
||||||
// TfLiteOpaqueDelegate rather than TfLiteDelegate when they want to
|
|
||||||
// refer to a delegate defined with that potentially different version
|
|
||||||
// of the TfLiteDelegate type.
|
|
||||||
struct TfLiteOpaqueDelegateStruct;
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
/// Abstract interface that returns TfLiteRegistrations given op codes or custom
|
|
||||||
/// op names. This is the mechanism that ops being referenced in the flatbuffer
|
|
||||||
/// model are mapped to executable function pointers (TfLiteRegistrations).
|
|
||||||
class OpResolver {
|
|
||||||
public:
|
|
||||||
/// Finds the op registration for a builtin operator by enum code.
|
|
||||||
virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
|
|
||||||
int version) const = 0;
|
|
||||||
/// Finds the op registration of a custom operator by op name.
|
|
||||||
virtual const TfLiteRegistration* FindOp(const char* op,
|
|
||||||
int version) const = 0;
|
|
||||||
|
|
||||||
// Represents a sequence of delegates.
|
|
||||||
using TfLiteDelegatePtrVector =
|
|
||||||
std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
|
|
||||||
|
|
||||||
// Returns optional delegates for resolving and handling ops in the flatbuffer
|
|
||||||
// model. This may be used in addition to the standard TfLiteRegistration
|
|
||||||
// lookup for graph resolution.
|
|
||||||
// WARNING: This API is deprecated, GetDelegateCreators is preferred.
|
|
||||||
virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Represents a function that creates a TfLite delegate instance.
|
|
||||||
using TfLiteDelegateCreator =
|
|
||||||
std::function<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>(
|
|
||||||
int /*num_threads*/)>;
|
|
||||||
|
|
||||||
// Represents a sequence of delegate creator functions.
|
|
||||||
using TfLiteDelegateCreators = std::vector<TfLiteDelegateCreator>;
|
|
||||||
|
|
||||||
// Returns a vector of delegate creators to create optional delegates for
|
|
||||||
// resolving and handling ops in the flatbuffer model. This may be used in
|
|
||||||
// addition to the standard TfLiteRegistration lookup for graph resolution.
|
|
||||||
//
|
|
||||||
// Note that this method is not used (will not be called) if you are using
|
|
||||||
// TF Lite in Google Play Services; the GetOpaqueDelegateCreators method
|
|
||||||
// (see below) is used for that case.
|
|
||||||
virtual TfLiteDelegateCreators GetDelegateCreators() const { return {}; }
|
|
||||||
|
|
||||||
// TODO(b/202712825): it would be nice if we could avoid the need for separate
|
|
||||||
// "opaque" types & methods for use only with TF Lite in Google Play Services.
|
|
||||||
|
|
||||||
// Represents an opaque delegate instance.
|
|
||||||
// WARNING: Experimental interface, subject to change.
|
|
||||||
using TfLiteOpaqueDelegatePtr =
|
|
||||||
std::unique_ptr<TfLiteOpaqueDelegateStruct,
|
|
||||||
void (*)(TfLiteOpaqueDelegateStruct*)>;
|
|
||||||
|
|
||||||
// Represents a function that creates an opaque delegate instance.
|
|
||||||
// WARNING: Experimental interface, subject to change.
|
|
||||||
using TfLiteOpaqueDelegateCreator =
|
|
||||||
std::function<TfLiteOpaqueDelegatePtr(int /*num_threads*/)>;
|
|
||||||
|
|
||||||
// Represents a sequence of opaque delegate creator functions.
|
|
||||||
// WARNING: Experimental interface, subject to change.
|
|
||||||
using TfLiteOpaqueDelegateCreators = std::vector<TfLiteOpaqueDelegateCreator>;
|
|
||||||
|
|
||||||
// Returns a vector of opaque delegate creators to create optional opaque
|
|
||||||
// delegates for resolving and handling ops in the flatbuffer model. This may
|
|
||||||
// be used in addition to the standard TfLiteRegistration lookup for graph
|
|
||||||
// resolution.
|
|
||||||
//
|
|
||||||
// Note that this method will be called only if you are using TF Lite in
|
|
||||||
// Google Play Services; if you are using regular TF Lite, GetDelegateCreators
|
|
||||||
// (see above) is used instead.
|
|
||||||
//
|
|
||||||
// WARNING: Experimental interface, subject to change.
|
|
||||||
virtual TfLiteOpaqueDelegateCreators GetOpaqueDelegateCreators() const {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual ~OpResolver() {}
|
|
||||||
|
|
||||||
private:
|
|
||||||
/// Returns true if this OpResolver may contain any "user defined" ops.
|
|
||||||
/// By "user defined" ops, we mean any op definitions other than those
|
|
||||||
/// contained in tflite::ops::builtin::BuiltinOpResolver.
|
|
||||||
///
|
|
||||||
/// If this method returns true, it doesn't necessarily mean that the
|
|
||||||
/// OpResolver contains a user-defined op, just that the absence of
|
|
||||||
/// user-defined ops can't be guaranteed.
|
|
||||||
///
|
|
||||||
/// Note that "user-defined" ops are not the same as "custom" ops;
|
|
||||||
/// BuiltinOpResolver may support certain "custom" ops, in addition to
|
|
||||||
/// "builtin" ops, and may not support all of the "builtin" op enum values.
|
|
||||||
virtual bool MayContainUserDefinedOps() const { return true; }
|
|
||||||
|
|
||||||
friend class OpResolverInternal;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Handles the logic for converting between an OperatorCode structure extracted
|
|
||||||
// from a flatbuffer and information about a registered operator
|
|
||||||
// implementation.
|
|
||||||
TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
|
|
||||||
const OpResolver& op_resolver,
|
|
||||||
ErrorReporter* error_reporter,
|
|
||||||
const TfLiteRegistration** registration);
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/lite/core/api/tensor_utils.h"
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
|
|
||||||
if (!tensor->is_variable) {
|
|
||||||
return kTfLiteOk;
|
|
||||||
}
|
|
||||||
// TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
|
|
||||||
// to the value of the buffer.
|
|
||||||
int value = 0;
|
|
||||||
if (tensor->type == kTfLiteInt8) {
|
|
||||||
value = tensor->params.zero_point;
|
|
||||||
}
|
|
||||||
// TODO(b/139446230): Provide a platform header to better handle these
|
|
||||||
// specific scenarios.
|
|
||||||
#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
|
|
||||||
defined(__i386) || defined(__x86__) || defined(__X86__) || \
|
|
||||||
defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
|
|
||||||
memset(tensor->data.raw, value, tensor->bytes);
|
|
||||||
#else
|
|
||||||
char* raw_ptr = tensor->data.raw;
|
|
||||||
for (size_t i = 0; i < tensor->bytes; ++i) {
|
|
||||||
*raw_ptr = value;
|
|
||||||
raw_ptr++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return kTfLiteOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
|
|
||||||
#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
// Resets a variable tensor to the default value.
|
|
||||||
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor);
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
|
|
||||||
@@ -1,102 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static inline int CountLeadingZeros32Slow(uint64_t n) {
|
|
||||||
int zeroes = 28;
|
|
||||||
if (n >> 16) zeroes -= 16, n >>= 16;
|
|
||||||
if (n >> 8) zeroes -= 8, n >>= 8;
|
|
||||||
if (n >> 4) zeroes -= 4, n >>= 4;
|
|
||||||
return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int CountLeadingZeros32(uint32_t n) {
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
unsigned long result = 0; // NOLINT(runtime/int)
|
|
||||||
if (_BitScanReverse(&result, n)) {
|
|
||||||
return 31 - result;
|
|
||||||
}
|
|
||||||
return 32;
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
|
|
||||||
// Handle 0 as a special case because __builtin_clz(0) is undefined.
|
|
||||||
if (n == 0) {
|
|
||||||
return 32;
|
|
||||||
}
|
|
||||||
return __builtin_clz(n);
|
|
||||||
#else
|
|
||||||
return CountLeadingZeros32Slow(n);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int MostSignificantBit32(uint32_t n) {
|
|
||||||
return 32 - CountLeadingZeros32(n);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int CountLeadingZeros64Slow(uint64_t n) {
|
|
||||||
int zeroes = 60;
|
|
||||||
if (n >> 32) zeroes -= 32, n >>= 32;
|
|
||||||
if (n >> 16) zeroes -= 16, n >>= 16;
|
|
||||||
if (n >> 8) zeroes -= 8, n >>= 8;
|
|
||||||
if (n >> 4) zeroes -= 4, n >>= 4;
|
|
||||||
return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int CountLeadingZeros64(uint64_t n) {
|
|
||||||
#if defined(_MSC_VER) && defined(_M_X64)
|
|
||||||
// MSVC does not have __builtin_clzll. Use _BitScanReverse64.
|
|
||||||
unsigned long result = 0; // NOLINT(runtime/int)
|
|
||||||
if (_BitScanReverse64(&result, n)) {
|
|
||||||
return 63 - result;
|
|
||||||
}
|
|
||||||
return 64;
|
|
||||||
#elif defined(_MSC_VER)
|
|
||||||
// MSVC does not have __builtin_clzll. Compose two calls to _BitScanReverse
|
|
||||||
unsigned long result = 0; // NOLINT(runtime/int)
|
|
||||||
if ((n >> 32) && _BitScanReverse(&result, n >> 32)) {
|
|
||||||
return 31 - result;
|
|
||||||
}
|
|
||||||
if (_BitScanReverse(&result, n)) {
|
|
||||||
return 63 - result;
|
|
||||||
}
|
|
||||||
return 64;
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
|
|
||||||
// Handle 0 as a special case because __builtin_clzll(0) is undefined.
|
|
||||||
if (n == 0) {
|
|
||||||
return 64;
|
|
||||||
}
|
|
||||||
return __builtin_clzll(n);
|
|
||||||
#else
|
|
||||||
return CountLeadingZeros64Slow(n);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int MostSignificantBit64(uint64_t n) {
|
|
||||||
return 64 - CountLeadingZeros64(n);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h"
|
|
||||||
|
|
||||||
void FftCompute(struct FftState* state, const int16_t* input,
|
|
||||||
int input_scale_shift) {
|
|
||||||
const size_t input_size = state->input_size;
|
|
||||||
const size_t fft_size = state->fft_size;
|
|
||||||
|
|
||||||
int16_t* fft_input = state->input;
|
|
||||||
// First, scale the input by the given shift.
|
|
||||||
size_t i;
|
|
||||||
for (i = 0; i < input_size; ++i) {
|
|
||||||
fft_input[i] = static_cast<int16_t>(static_cast<uint16_t>(input[i])
|
|
||||||
<< input_scale_shift);
|
|
||||||
}
|
|
||||||
// Zero out whatever else remains in the top part of the input.
|
|
||||||
for (; i < fft_size; ++i) {
|
|
||||||
fft_input[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply the FFT.
|
|
||||||
kissfft_fixed16::kiss_fftr(
|
|
||||||
reinterpret_cast<kissfft_fixed16::kiss_fftr_cfg>(state->scratch),
|
|
||||||
state->input,
|
|
||||||
reinterpret_cast<kissfft_fixed16::kiss_fft_cpx*>(state->output));
|
|
||||||
}
|
|
||||||
|
|
||||||
void FftInit(struct FftState* state) {
|
|
||||||
// All the initialization is done in FftPopulateState()
|
|
||||||
}
|
|
||||||
|
|
||||||
void FftReset(struct FftState* state) {
|
|
||||||
memset(state->input, 0, state->fft_size * sizeof(*state->input));
|
|
||||||
memset(state->output, 0, (state->fft_size / 2 + 1) * sizeof(*state->output));
|
|
||||||
}
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct complex_int16_t {
|
|
||||||
int16_t real;
|
|
||||||
int16_t imag;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct FftState {
|
|
||||||
int16_t* input;
|
|
||||||
struct complex_int16_t* output;
|
|
||||||
size_t fft_size;
|
|
||||||
size_t input_size;
|
|
||||||
void* scratch;
|
|
||||||
size_t scratch_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
void FftCompute(struct FftState* state, const int16_t* input,
|
|
||||||
int input_scale_shift);
|
|
||||||
|
|
||||||
void FftInit(struct FftState* state);
|
|
||||||
|
|
||||||
void FftReset(struct FftState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
|
|
||||||
@@ -1,70 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h"
|
|
||||||
|
|
||||||
int FftPopulateState(struct FftState* state, size_t input_size) {
|
|
||||||
state->input_size = input_size;
|
|
||||||
state->fft_size = 1;
|
|
||||||
while (state->fft_size < state->input_size) {
|
|
||||||
state->fft_size <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
state->input = reinterpret_cast<int16_t*>(
|
|
||||||
malloc(state->fft_size * sizeof(*state->input)));
|
|
||||||
if (state->input == nullptr) {
|
|
||||||
fprintf(stderr, "Failed to alloc fft input buffer\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
state->output = reinterpret_cast<complex_int16_t*>(
|
|
||||||
malloc((state->fft_size / 2 + 1) * sizeof(*state->output) * 2));
|
|
||||||
if (state->output == nullptr) {
|
|
||||||
fprintf(stderr, "Failed to alloc fft output buffer\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ask kissfft how much memory it wants.
|
|
||||||
size_t scratch_size = 0;
|
|
||||||
kissfft_fixed16::kiss_fftr_cfg kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(
|
|
||||||
state->fft_size, 0, nullptr, &scratch_size);
|
|
||||||
if (kfft_cfg != nullptr) {
|
|
||||||
fprintf(stderr, "Kiss memory sizing failed.\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
state->scratch = malloc(scratch_size);
|
|
||||||
if (state->scratch == nullptr) {
|
|
||||||
fprintf(stderr, "Failed to alloc fft scratch buffer\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
state->scratch_size = scratch_size;
|
|
||||||
// Let kissfft configure the scratch space we just allocated
|
|
||||||
kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(state->fft_size, 0,
|
|
||||||
state->scratch, &scratch_size);
|
|
||||||
if (kfft_cfg != state->scratch) {
|
|
||||||
fprintf(stderr, "Kiss memory preallocation strategy failed.\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void FftFreeStateContents(struct FftState* state) {
|
|
||||||
free(state->input);
|
|
||||||
free(state->output);
|
|
||||||
free(state->scratch);
|
|
||||||
}
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Prepares and FFT for the given input size.
|
|
||||||
int FftPopulateState(struct FftState* state, size_t input_size);
|
|
||||||
|
|
||||||
// Frees any allocated buffers.
|
|
||||||
void FftFreeStateContents(struct FftState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
|
|
||||||
@@ -1,134 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
|
|
||||||
|
|
||||||
void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
|
|
||||||
struct complex_int16_t* fft_output,
|
|
||||||
int32_t* energy) {
|
|
||||||
const int end_index = state->end_index;
|
|
||||||
int i;
|
|
||||||
energy += state->start_index;
|
|
||||||
fft_output += state->start_index;
|
|
||||||
for (i = state->start_index; i < end_index; ++i) {
|
|
||||||
const int32_t real = fft_output->real;
|
|
||||||
const int32_t imag = fft_output->imag;
|
|
||||||
fft_output++;
|
|
||||||
const uint32_t mag_squared = (real * real) + (imag * imag);
|
|
||||||
*energy++ = mag_squared;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void FilterbankAccumulateChannels(struct FilterbankState* state,
|
|
||||||
const int32_t* energy) {
|
|
||||||
uint64_t* work = state->work;
|
|
||||||
uint64_t weight_accumulator = 0;
|
|
||||||
uint64_t unweight_accumulator = 0;
|
|
||||||
|
|
||||||
const int16_t* channel_frequency_starts = state->channel_frequency_starts;
|
|
||||||
const int16_t* channel_weight_starts = state->channel_weight_starts;
|
|
||||||
const int16_t* channel_widths = state->channel_widths;
|
|
||||||
|
|
||||||
int num_channels_plus_1 = state->num_channels + 1;
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < num_channels_plus_1; ++i) {
|
|
||||||
const int32_t* magnitudes = energy + *channel_frequency_starts++;
|
|
||||||
const int16_t* weights = state->weights + *channel_weight_starts;
|
|
||||||
const int16_t* unweights = state->unweights + *channel_weight_starts++;
|
|
||||||
const int width = *channel_widths++;
|
|
||||||
int j;
|
|
||||||
for (j = 0; j < width; ++j) {
|
|
||||||
weight_accumulator += *weights++ * ((uint64_t)*magnitudes);
|
|
||||||
unweight_accumulator += *unweights++ * ((uint64_t)*magnitudes);
|
|
||||||
++magnitudes;
|
|
||||||
}
|
|
||||||
*work++ = weight_accumulator;
|
|
||||||
weight_accumulator = unweight_accumulator;
|
|
||||||
unweight_accumulator = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint16_t Sqrt32(uint32_t num) {
|
|
||||||
if (num == 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
uint32_t res = 0;
|
|
||||||
int max_bit_number = 32 - MostSignificantBit32(num);
|
|
||||||
max_bit_number |= 1;
|
|
||||||
uint32_t bit = 1U << (31 - max_bit_number);
|
|
||||||
int iterations = (31 - max_bit_number) / 2 + 1;
|
|
||||||
while (iterations--) {
|
|
||||||
if (num >= res + bit) {
|
|
||||||
num -= res + bit;
|
|
||||||
res = (res >> 1U) + bit;
|
|
||||||
} else {
|
|
||||||
res >>= 1U;
|
|
||||||
}
|
|
||||||
bit >>= 2U;
|
|
||||||
}
|
|
||||||
// Do rounding - if we have the bits.
|
|
||||||
if (num > res && res != 0xFFFF) {
|
|
||||||
++res;
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t Sqrt64(uint64_t num) {
|
|
||||||
// Take a shortcut and just use 32 bit operations if the upper word is all
|
|
||||||
// clear. This will cause a slight off by one issue for numbers close to 2^32,
|
|
||||||
// but it probably isn't going to matter (and gives us a big performance win).
|
|
||||||
if ((num >> 32) == 0) {
|
|
||||||
return Sqrt32((uint32_t)num);
|
|
||||||
}
|
|
||||||
uint64_t res = 0;
|
|
||||||
int max_bit_number = 64 - MostSignificantBit64(num);
|
|
||||||
max_bit_number |= 1;
|
|
||||||
uint64_t bit = 1ULL << (63 - max_bit_number);
|
|
||||||
int iterations = (63 - max_bit_number) / 2 + 1;
|
|
||||||
while (iterations--) {
|
|
||||||
if (num >= res + bit) {
|
|
||||||
num -= res + bit;
|
|
||||||
res = (res >> 1U) + bit;
|
|
||||||
} else {
|
|
||||||
res >>= 1U;
|
|
||||||
}
|
|
||||||
bit >>= 2U;
|
|
||||||
}
|
|
||||||
// Do rounding - if we have the bits.
|
|
||||||
if (num > res && res != 0xFFFFFFFFLL) {
|
|
||||||
++res;
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) {
|
|
||||||
const int num_channels = state->num_channels;
|
|
||||||
const uint64_t* work = state->work + 1;
|
|
||||||
// Reuse the work buffer since we're fine clobbering it at this point to hold
|
|
||||||
// the output.
|
|
||||||
uint32_t* output = (uint32_t*)state->work;
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < num_channels; ++i) {
|
|
||||||
*output++ = Sqrt64(*work++) >> scale_down_shift;
|
|
||||||
}
|
|
||||||
return (uint32_t*)state->work;
|
|
||||||
}
|
|
||||||
|
|
||||||
void FilterbankReset(struct FilterbankState* state) {
|
|
||||||
memset(state->work, 0, (state->num_channels + 1) * sizeof(*state->work));
|
|
||||||
}
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
|
|
||||||
|
|
||||||
#define kFilterbankBits 12
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct FilterbankState {
|
|
||||||
int num_channels;
|
|
||||||
int start_index;
|
|
||||||
int end_index;
|
|
||||||
int16_t* channel_frequency_starts;
|
|
||||||
int16_t* channel_weight_starts;
|
|
||||||
int16_t* channel_widths;
|
|
||||||
int16_t* weights;
|
|
||||||
int16_t* unweights;
|
|
||||||
uint64_t* work;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Converts the relevant complex values of an FFT output into energy (the
|
|
||||||
// square magnitude).
|
|
||||||
void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
|
|
||||||
struct complex_int16_t* fft_output,
|
|
||||||
int32_t* energy);
|
|
||||||
|
|
||||||
// Computes the mel-scale filterbank on the given energy array. Output is cached
|
|
||||||
// internally - to fetch it, you need to call FilterbankSqrt.
|
|
||||||
void FilterbankAccumulateChannels(struct FilterbankState* state,
|
|
||||||
const int32_t* energy);
|
|
||||||
|
|
||||||
// Applies an integer square root to the 64 bit intermediate values of the
|
|
||||||
// filterbank, and returns a pointer to them. Memory will be invalidated the
|
|
||||||
// next time FilterbankAccumulateChannels is called.
|
|
||||||
uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift);
|
|
||||||
|
|
||||||
void FilterbankReset(struct FilterbankState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
|
|
||||||
@@ -1,220 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#define kFilterbankIndexAlignment 4
|
|
||||||
#define kFilterbankChannelBlockSize 4
|
|
||||||
|
|
||||||
void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config) {
|
|
||||||
config->num_channels = 32;
|
|
||||||
config->lower_band_limit = 125.0f;
|
|
||||||
config->upper_band_limit = 7500.0f;
|
|
||||||
config->output_scale_shift = 7;
|
|
||||||
}
|
|
||||||
|
|
||||||
static float FreqToMel(float freq) { return 1127.0 * log1p(freq / 700.0); }
|
|
||||||
|
|
||||||
static void CalculateCenterFrequencies(const int num_channels,
|
|
||||||
const float lower_frequency_limit,
|
|
||||||
const float upper_frequency_limit,
|
|
||||||
float* center_frequencies) {
|
|
||||||
assert(lower_frequency_limit >= 0.0f);
|
|
||||||
assert(upper_frequency_limit > lower_frequency_limit);
|
|
||||||
|
|
||||||
const float mel_low = FreqToMel(lower_frequency_limit);
|
|
||||||
const float mel_hi = FreqToMel(upper_frequency_limit);
|
|
||||||
const float mel_span = mel_hi - mel_low;
|
|
||||||
const float mel_spacing = mel_span / ((float)num_channels);
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < num_channels; ++i) {
|
|
||||||
center_frequencies[i] = mel_low + (mel_spacing * (i + 1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void QuantizeFilterbankWeights(const float float_weight, int16_t* weight,
|
|
||||||
int16_t* unweight) {
|
|
||||||
*weight = floor(float_weight * (1 << kFilterbankBits) + 0.5);
|
|
||||||
*unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5);
|
|
||||||
}
|
|
||||||
|
|
||||||
int FilterbankPopulateState(const struct FilterbankConfig* config,
|
|
||||||
struct FilterbankState* state, int sample_rate,
|
|
||||||
int spectrum_size) {
|
|
||||||
state->num_channels = config->num_channels;
|
|
||||||
const int num_channels_plus_1 = config->num_channels + 1;
|
|
||||||
|
|
||||||
// How should we align things to index counts given the byte alignment?
|
|
||||||
const int index_alignment =
|
|
||||||
(kFilterbankIndexAlignment < sizeof(int16_t)
|
|
||||||
? 1
|
|
||||||
: kFilterbankIndexAlignment / sizeof(int16_t));
|
|
||||||
|
|
||||||
state->channel_frequency_starts =
|
|
||||||
malloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts));
|
|
||||||
state->channel_weight_starts =
|
|
||||||
malloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts));
|
|
||||||
state->channel_widths =
|
|
||||||
malloc(num_channels_plus_1 * sizeof(*state->channel_widths));
|
|
||||||
state->work = malloc(num_channels_plus_1 * sizeof(*state->work));
|
|
||||||
|
|
||||||
float* center_mel_freqs =
|
|
||||||
malloc(num_channels_plus_1 * sizeof(*center_mel_freqs));
|
|
||||||
int16_t* actual_channel_starts =
|
|
||||||
malloc(num_channels_plus_1 * sizeof(*actual_channel_starts));
|
|
||||||
int16_t* actual_channel_widths =
|
|
||||||
malloc(num_channels_plus_1 * sizeof(*actual_channel_widths));
|
|
||||||
|
|
||||||
if (state->channel_frequency_starts == NULL ||
|
|
||||||
state->channel_weight_starts == NULL || state->channel_widths == NULL ||
|
|
||||||
center_mel_freqs == NULL || actual_channel_starts == NULL ||
|
|
||||||
actual_channel_widths == NULL) {
|
|
||||||
free(center_mel_freqs);
|
|
||||||
free(actual_channel_starts);
|
|
||||||
free(actual_channel_widths);
|
|
||||||
fprintf(stderr, "Failed to allocate channel buffers\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit,
|
|
||||||
config->upper_band_limit, center_mel_freqs);
|
|
||||||
|
|
||||||
// Always exclude DC.
|
|
||||||
const float hz_per_sbin = 0.5 * sample_rate / ((float)spectrum_size - 1);
|
|
||||||
state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin;
|
|
||||||
state->end_index = 0; // Initialized to zero here, but actually set below.
|
|
||||||
|
|
||||||
// For each channel, we need to figure out what frequencies belong to it, and
|
|
||||||
// how much padding we need to add so that we can efficiently multiply the
|
|
||||||
// weights and unweights for accumulation. To simplify the multiplication
|
|
||||||
// logic, all channels will have some multiplication to do (even if there are
|
|
||||||
// no frequencies that accumulate to that channel) - they will be directed to
|
|
||||||
// a set of zero weights.
|
|
||||||
int chan_freq_index_start = state->start_index;
|
|
||||||
int weight_index_start = 0;
|
|
||||||
int needs_zeros = 0;
|
|
||||||
|
|
||||||
int chan;
|
|
||||||
for (chan = 0; chan < num_channels_plus_1; ++chan) {
|
|
||||||
// Keep jumping frequencies until we overshoot the bound on this channel.
|
|
||||||
int freq_index = chan_freq_index_start;
|
|
||||||
while (FreqToMel((freq_index)*hz_per_sbin) <= center_mel_freqs[chan]) {
|
|
||||||
++freq_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int width = freq_index - chan_freq_index_start;
|
|
||||||
actual_channel_starts[chan] = chan_freq_index_start;
|
|
||||||
actual_channel_widths[chan] = width;
|
|
||||||
|
|
||||||
if (width == 0) {
|
|
||||||
// This channel doesn't actually get anything from the frequencies, it's
|
|
||||||
// always zero. We need then to insert some 'zero' weights into the
|
|
||||||
// output, and just redirect this channel to do a single multiplication at
|
|
||||||
// this point. For simplicity, the zeros are placed at the beginning of
|
|
||||||
// the weights arrays, so we have to go and update all the other
|
|
||||||
// weight_starts to reflect this shift (but only once).
|
|
||||||
state->channel_frequency_starts[chan] = 0;
|
|
||||||
state->channel_weight_starts[chan] = 0;
|
|
||||||
state->channel_widths[chan] = kFilterbankChannelBlockSize;
|
|
||||||
if (!needs_zeros) {
|
|
||||||
needs_zeros = 1;
|
|
||||||
int j;
|
|
||||||
for (j = 0; j < chan; ++j) {
|
|
||||||
state->channel_weight_starts[j] += kFilterbankChannelBlockSize;
|
|
||||||
}
|
|
||||||
weight_index_start += kFilterbankChannelBlockSize;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// How far back do we need to go to ensure that we have the proper
|
|
||||||
// alignment?
|
|
||||||
const int aligned_start =
|
|
||||||
(chan_freq_index_start / index_alignment) * index_alignment;
|
|
||||||
const int aligned_width = (chan_freq_index_start - aligned_start + width);
|
|
||||||
const int padded_width =
|
|
||||||
(((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) *
|
|
||||||
kFilterbankChannelBlockSize;
|
|
||||||
|
|
||||||
state->channel_frequency_starts[chan] = aligned_start;
|
|
||||||
state->channel_weight_starts[chan] = weight_index_start;
|
|
||||||
state->channel_widths[chan] = padded_width;
|
|
||||||
weight_index_start += padded_width;
|
|
||||||
}
|
|
||||||
chan_freq_index_start = freq_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate the two arrays to store the weights - weight_index_start contains
|
|
||||||
// the index of what would be the next set of weights that we would need to
|
|
||||||
// add, so that's how many weights we need to allocate.
|
|
||||||
state->weights = calloc(weight_index_start, sizeof(*state->weights));
|
|
||||||
state->unweights = calloc(weight_index_start, sizeof(*state->unweights));
|
|
||||||
|
|
||||||
// If the alloc failed, we also need to nuke the arrays.
|
|
||||||
if (state->weights == NULL || state->unweights == NULL) {
|
|
||||||
free(center_mel_freqs);
|
|
||||||
free(actual_channel_starts);
|
|
||||||
free(actual_channel_widths);
|
|
||||||
fprintf(stderr, "Failed to allocate weights or unweights\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Next pass, compute all the weights. Since everything has been memset to
|
|
||||||
// zero, we only need to fill in the weights that correspond to some frequency
|
|
||||||
// for a channel.
|
|
||||||
const float mel_low = FreqToMel(config->lower_band_limit);
|
|
||||||
for (chan = 0; chan < num_channels_plus_1; ++chan) {
|
|
||||||
int frequency = actual_channel_starts[chan];
|
|
||||||
const int num_frequencies = actual_channel_widths[chan];
|
|
||||||
const int frequency_offset =
|
|
||||||
frequency - state->channel_frequency_starts[chan];
|
|
||||||
const int weight_start = state->channel_weight_starts[chan];
|
|
||||||
const float denom_val = (chan == 0) ? mel_low : center_mel_freqs[chan - 1];
|
|
||||||
|
|
||||||
int j;
|
|
||||||
for (j = 0; j < num_frequencies; ++j, ++frequency) {
|
|
||||||
const float weight =
|
|
||||||
(center_mel_freqs[chan] - FreqToMel(frequency * hz_per_sbin)) /
|
|
||||||
(center_mel_freqs[chan] - denom_val);
|
|
||||||
|
|
||||||
// Make the float into an integer for the weights (and unweights).
|
|
||||||
const int weight_index = weight_start + frequency_offset + j;
|
|
||||||
QuantizeFilterbankWeights(weight, state->weights + weight_index,
|
|
||||||
state->unweights + weight_index);
|
|
||||||
}
|
|
||||||
if (frequency > state->end_index) {
|
|
||||||
state->end_index = frequency;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
free(center_mel_freqs);
|
|
||||||
free(actual_channel_starts);
|
|
||||||
free(actual_channel_widths);
|
|
||||||
if (state->end_index >= spectrum_size) {
|
|
||||||
fprintf(stderr, "Filterbank end_index is above spectrum size.\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void FilterbankFreeStateContents(struct FilterbankState* state) {
|
|
||||||
free(state->channel_frequency_starts);
|
|
||||||
free(state->channel_weight_starts);
|
|
||||||
free(state->channel_widths);
|
|
||||||
free(state->weights);
|
|
||||||
free(state->unweights);
|
|
||||||
free(state->work);
|
|
||||||
}
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct FilterbankConfig {
|
|
||||||
// number of frequency channel buckets for filterbank
|
|
||||||
int num_channels;
|
|
||||||
// maximum frequency to include
|
|
||||||
float upper_band_limit;
|
|
||||||
// minimum frequency to include
|
|
||||||
float lower_band_limit;
|
|
||||||
// unused
|
|
||||||
int output_scale_shift;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Fills the frontendConfig with "sane" defaults.
|
|
||||||
void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config);
|
|
||||||
|
|
||||||
// Allocates any buffers.
|
|
||||||
int FilterbankPopulateState(const struct FilterbankConfig* config,
|
|
||||||
struct FilterbankState* state, int sample_rate,
|
|
||||||
int spectrum_size);
|
|
||||||
|
|
||||||
// Frees any allocated buffers.
|
|
||||||
void FilterbankFreeStateContents(struct FilterbankState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
|
|
||||||
|
|
||||||
struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
|
|
||||||
const int16_t* samples,
|
|
||||||
size_t num_samples,
|
|
||||||
size_t* num_samples_read) {
|
|
||||||
struct FrontendOutput output;
|
|
||||||
output.values = NULL;
|
|
||||||
output.size = 0;
|
|
||||||
|
|
||||||
// Try to apply the window - if it fails, return and wait for more data.
|
|
||||||
if (!WindowProcessSamples(&state->window, samples, num_samples,
|
|
||||||
num_samples_read)) {
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply the FFT to the window's output (and scale it so that the fixed point
|
|
||||||
// FFT can have as much resolution as possible).
|
|
||||||
int input_shift =
|
|
||||||
15 - MostSignificantBit32(state->window.max_abs_output_value);
|
|
||||||
FftCompute(&state->fft, state->window.output, input_shift);
|
|
||||||
|
|
||||||
// We can re-ruse the fft's output buffer to hold the energy.
|
|
||||||
int32_t* energy = (int32_t*)state->fft.output;
|
|
||||||
|
|
||||||
FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output,
|
|
||||||
energy);
|
|
||||||
|
|
||||||
FilterbankAccumulateChannels(&state->filterbank, energy);
|
|
||||||
uint32_t* scaled_filterbank = FilterbankSqrt(&state->filterbank, input_shift);
|
|
||||||
|
|
||||||
// Apply noise reduction.
|
|
||||||
NoiseReductionApply(&state->noise_reduction, scaled_filterbank);
|
|
||||||
|
|
||||||
if (state->pcan_gain_control.enable_pcan) {
|
|
||||||
PcanGainControlApply(&state->pcan_gain_control, scaled_filterbank);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply the log and scale.
|
|
||||||
int correction_bits =
|
|
||||||
MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
|
|
||||||
uint16_t* logged_filterbank =
|
|
||||||
LogScaleApply(&state->log_scale, scaled_filterbank,
|
|
||||||
state->filterbank.num_channels, correction_bits);
|
|
||||||
|
|
||||||
output.size = state->filterbank.num_channels;
|
|
||||||
output.values = logged_filterbank;
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
void FrontendReset(struct FrontendState* state) {
|
|
||||||
WindowReset(&state->window);
|
|
||||||
FftReset(&state->fft);
|
|
||||||
FilterbankReset(&state->filterbank);
|
|
||||||
NoiseReductionReset(&state->noise_reduction);
|
|
||||||
}
|
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct FrontendState {
|
|
||||||
struct WindowState window;
|
|
||||||
struct FftState fft;
|
|
||||||
struct FilterbankState filterbank;
|
|
||||||
struct NoiseReductionState noise_reduction;
|
|
||||||
struct PcanGainControlState pcan_gain_control;
|
|
||||||
struct LogScaleState log_scale;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct FrontendOutput {
|
|
||||||
const uint16_t* values;
|
|
||||||
size_t size;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Main entry point to processing frontend samples. Updates num_samples_read to
|
|
||||||
// contain the number of samples that have been consumed from the input array.
|
|
||||||
// Returns a struct containing the generated output. If not enough samples were
|
|
||||||
// added to generate a feature vector, the returned size will be 0 and the
|
|
||||||
// values pointer will be NULL. Note that the output pointer will be invalidated
|
|
||||||
// as soon as FrontendProcessSamples is called again, so copy the contents
|
|
||||||
// elsewhere if you need to use them later.
|
|
||||||
struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
|
|
||||||
const int16_t* samples,
|
|
||||||
size_t num_samples,
|
|
||||||
size_t* num_samples_read);
|
|
||||||
|
|
||||||
void FrontendReset(struct FrontendState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h"
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
|
|
||||||
|
|
||||||
void FrontendFillConfigWithDefaults(struct FrontendConfig* config) {
|
|
||||||
WindowFillConfigWithDefaults(&config->window);
|
|
||||||
FilterbankFillConfigWithDefaults(&config->filterbank);
|
|
||||||
NoiseReductionFillConfigWithDefaults(&config->noise_reduction);
|
|
||||||
PcanGainControlFillConfigWithDefaults(&config->pcan_gain_control);
|
|
||||||
LogScaleFillConfigWithDefaults(&config->log_scale);
|
|
||||||
}
|
|
||||||
|
|
||||||
int FrontendPopulateState(const struct FrontendConfig* config,
|
|
||||||
struct FrontendState* state, int sample_rate) {
|
|
||||||
memset(state, 0, sizeof(*state));
|
|
||||||
|
|
||||||
if (!WindowPopulateState(&config->window, &state->window, sample_rate)) {
|
|
||||||
fprintf(stderr, "Failed to populate window state\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!FftPopulateState(&state->fft, state->window.size)) {
|
|
||||||
fprintf(stderr, "Failed to populate fft state\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
FftInit(&state->fft);
|
|
||||||
|
|
||||||
if (!FilterbankPopulateState(&config->filterbank, &state->filterbank,
|
|
||||||
sample_rate, state->fft.fft_size / 2 + 1)) {
|
|
||||||
fprintf(stderr, "Failed to populate filterbank state\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!NoiseReductionPopulateState(&config->noise_reduction,
|
|
||||||
&state->noise_reduction,
|
|
||||||
state->filterbank.num_channels)) {
|
|
||||||
fprintf(stderr, "Failed to populate noise reduction state\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int input_correction_bits =
|
|
||||||
MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
|
|
||||||
if (!PcanGainControlPopulateState(
|
|
||||||
&config->pcan_gain_control, &state->pcan_gain_control,
|
|
||||||
state->noise_reduction.estimate, state->filterbank.num_channels,
|
|
||||||
state->noise_reduction.smoothing_bits, input_correction_bits)) {
|
|
||||||
fprintf(stderr, "Failed to populate pcan gain control state\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!LogScalePopulateState(&config->log_scale, &state->log_scale)) {
|
|
||||||
fprintf(stderr, "Failed to populate log scale state\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
FrontendReset(state);
|
|
||||||
|
|
||||||
// All good, return a true value.
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void FrontendFreeStateContents(struct FrontendState* state) {
|
|
||||||
WindowFreeStateContents(&state->window);
|
|
||||||
FftFreeStateContents(&state->fft);
|
|
||||||
FilterbankFreeStateContents(&state->filterbank);
|
|
||||||
NoiseReductionFreeStateContents(&state->noise_reduction);
|
|
||||||
PcanGainControlFreeStateContents(&state->pcan_gain_control);
|
|
||||||
}
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct FrontendConfig {
|
|
||||||
struct WindowConfig window;
|
|
||||||
struct FilterbankConfig filterbank;
|
|
||||||
struct NoiseReductionConfig noise_reduction;
|
|
||||||
struct PcanGainControlConfig pcan_gain_control;
|
|
||||||
struct LogScaleConfig log_scale;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Fills the frontendConfig with "sane" defaults.
|
|
||||||
void FrontendFillConfigWithDefaults(struct FrontendConfig* config);
|
|
||||||
|
|
||||||
// Allocates any buffers.
|
|
||||||
int FrontendPopulateState(const struct FrontendConfig* config,
|
|
||||||
struct FrontendState* state, int sample_rate);
|
|
||||||
|
|
||||||
// Frees any allocated buffers.
|
|
||||||
void FrontendFreeStateContents(struct FrontendState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
|
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
|
|
||||||
|
|
||||||
// This header file should be included in all variants of kiss_fft_$type.{h,cc}
|
|
||||||
// so that their sub-included source files do not mistakenly wrap libc header
|
|
||||||
// files within their kissfft_$type namespaces.
|
|
||||||
// E.g, This header avoids kissfft_int16.h containing:
|
|
||||||
// namespace kiss_fft_int16 {
|
|
||||||
// #include "kiss_fft.h"
|
|
||||||
// }
|
|
||||||
// where kiss_fft_.h contains:
|
|
||||||
// #include <math.h>
|
|
||||||
//
|
|
||||||
// TRICK: By including the following header files here, their preprocessor
|
|
||||||
// header guards prevent them being re-defined inside of the kiss_fft_$type
|
|
||||||
// namespaces declared within the kiss_fft_$type.{h,cc} sources.
|
|
||||||
// Note that the original kiss_fft*.h files are untouched since they
|
|
||||||
// may be used in libraries that include them directly.
|
|
||||||
|
|
||||||
#include <limits.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#ifdef FIXED_POINT
|
|
||||||
#include <sys/types.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef USE_SIMD
|
|
||||||
#include <xmmintrin.h>
|
|
||||||
#endif
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h"
|
|
||||||
|
|
||||||
#define FIXED_POINT 16
|
|
||||||
namespace kissfft_fixed16 {
|
|
||||||
#include "kiss_fft.c"
|
|
||||||
#include "tools/kiss_fftr.c"
|
|
||||||
} // namespace kissfft_fixed16
|
|
||||||
#undef FIXED_POINT
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h"
|
|
||||||
|
|
||||||
// Wrap 16-bit kiss fft in its own namespace. Enables us to link an application
|
|
||||||
// with different kiss fft resultions (16/32 bit interger, float, double)
|
|
||||||
// without getting a linker error.
|
|
||||||
#define FIXED_POINT 16
|
|
||||||
namespace kissfft_fixed16 {
|
|
||||||
#include "kiss_fft.h"
|
|
||||||
#include "tools/kiss_fftr.h"
|
|
||||||
} // namespace kissfft_fixed16
|
|
||||||
#undef FIXED_POINT
|
|
||||||
#undef kiss_fft_scalar
|
|
||||||
#undef KISS_FFT_H
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
|
|
||||||
const uint16_t kLogLut[]
|
|
||||||
#ifndef _MSC_VER
|
|
||||||
__attribute__((aligned(4)))
|
|
||||||
#endif // _MSV_VER
|
|
||||||
= {0, 224, 442, 654, 861, 1063, 1259, 1450, 1636, 1817, 1992, 2163,
|
|
||||||
2329, 2490, 2646, 2797, 2944, 3087, 3224, 3358, 3487, 3611, 3732, 3848,
|
|
||||||
3960, 4068, 4172, 4272, 4368, 4460, 4549, 4633, 4714, 4791, 4864, 4934,
|
|
||||||
5001, 5063, 5123, 5178, 5231, 5280, 5326, 5368, 5408, 5444, 5477, 5507,
|
|
||||||
5533, 5557, 5578, 5595, 5610, 5622, 5631, 5637, 5640, 5641, 5638, 5633,
|
|
||||||
5626, 5615, 5602, 5586, 5568, 5547, 5524, 5498, 5470, 5439, 5406, 5370,
|
|
||||||
5332, 5291, 5249, 5203, 5156, 5106, 5054, 5000, 4944, 4885, 4825, 4762,
|
|
||||||
4697, 4630, 4561, 4490, 4416, 4341, 4264, 4184, 4103, 4020, 3935, 3848,
|
|
||||||
3759, 3668, 3575, 3481, 3384, 3286, 3186, 3084, 2981, 2875, 2768, 2659,
|
|
||||||
2549, 2437, 2323, 2207, 2090, 1971, 1851, 1729, 1605, 1480, 1353, 1224,
|
|
||||||
1094, 963, 830, 695, 559, 421, 282, 142, 0, 0};
|
|
||||||
@@ -1,40 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Number of segments in the log lookup table. The table will be kLogSegments+1
|
|
||||||
// in length (with some padding).
|
|
||||||
#define kLogSegments 128
|
|
||||||
#define kLogSegmentsLog2 7
|
|
||||||
|
|
||||||
// Scale used by lookup table.
|
|
||||||
#define kLogScale 65536
|
|
||||||
#define kLogScaleLog2 16
|
|
||||||
#define kLogCoeff 45426
|
|
||||||
|
|
||||||
extern const uint16_t kLogLut[];
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
|
|
||||||
@@ -1,83 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
|
|
||||||
|
|
||||||
#define kuint16max 0x0000FFFF
|
|
||||||
|
|
||||||
// The following functions implement integer logarithms of various sizes. The
|
|
||||||
// approximation is calculated according to method described in
|
|
||||||
// www.inti.gob.ar/electronicaeinformatica/instrumentacion/utic/
|
|
||||||
// publicaciones/SPL2007/Log10-spl07.pdf
|
|
||||||
// It first calculates log2 of the input and then converts it to natural
|
|
||||||
// logarithm.
|
|
||||||
|
|
||||||
static uint32_t Log2FractionPart(const uint32_t x, const uint32_t log2x) {
|
|
||||||
// Part 1
|
|
||||||
int32_t frac = x - (1LL << log2x);
|
|
||||||
if (log2x < kLogScaleLog2) {
|
|
||||||
frac <<= kLogScaleLog2 - log2x;
|
|
||||||
} else {
|
|
||||||
frac >>= log2x - kLogScaleLog2;
|
|
||||||
}
|
|
||||||
// Part 2
|
|
||||||
const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2);
|
|
||||||
const uint32_t seg_unit =
|
|
||||||
(((uint32_t)1) << kLogScaleLog2) >> kLogSegmentsLog2;
|
|
||||||
|
|
||||||
const int32_t c0 = kLogLut[base_seg];
|
|
||||||
const int32_t c1 = kLogLut[base_seg + 1];
|
|
||||||
const int32_t seg_base = seg_unit * base_seg;
|
|
||||||
const int32_t rel_pos = ((c1 - c0) * (frac - seg_base)) >> kLogScaleLog2;
|
|
||||||
return frac + c0 + rel_pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t Log(const uint32_t x, const uint32_t scale_shift) {
|
|
||||||
const uint32_t integer = MostSignificantBit32(x) - 1;
|
|
||||||
const uint32_t fraction = Log2FractionPart(x, integer);
|
|
||||||
const uint32_t log2 = (integer << kLogScaleLog2) + fraction;
|
|
||||||
const uint32_t round = kLogScale / 2;
|
|
||||||
const uint32_t loge = (((uint64_t)kLogCoeff) * log2 + round) >> kLogScaleLog2;
|
|
||||||
// Finally scale to our output scale
|
|
||||||
const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2;
|
|
||||||
return loge_scaled;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
|
|
||||||
int signal_size, int correction_bits) {
|
|
||||||
const int scale_shift = state->scale_shift;
|
|
||||||
uint16_t* output = (uint16_t*)signal;
|
|
||||||
uint16_t* ret = output;
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < signal_size; ++i) {
|
|
||||||
uint32_t value = *signal++;
|
|
||||||
if (state->enable_log) {
|
|
||||||
if (correction_bits < 0) {
|
|
||||||
value >>= -correction_bits;
|
|
||||||
} else {
|
|
||||||
value <<= correction_bits;
|
|
||||||
}
|
|
||||||
if (value > 1) {
|
|
||||||
value = Log(value, scale_shift);
|
|
||||||
} else {
|
|
||||||
value = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*output++ = (value < kuint16max) ? value : kuint16max;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct LogScaleState {
|
|
||||||
int enable_log;
|
|
||||||
int scale_shift;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Applies a fixed point logarithm to the signal and converts it to 16 bit. Note
|
|
||||||
// that the signal array will be modified.
|
|
||||||
uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
|
|
||||||
int signal_size, int correction_bits);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
|
|
||||||
|
|
||||||
void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config) {
|
|
||||||
config->enable_log = 1;
|
|
||||||
config->scale_shift = 6;
|
|
||||||
}
|
|
||||||
|
|
||||||
int LogScalePopulateState(const struct LogScaleConfig* config,
|
|
||||||
struct LogScaleState* state) {
|
|
||||||
state->enable_log = config->enable_log;
|
|
||||||
state->scale_shift = config->scale_shift;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct LogScaleConfig {
|
|
||||||
// set to false (0) to disable this module
|
|
||||||
int enable_log;
|
|
||||||
// scale results by 2^(scale_shift)
|
|
||||||
int scale_shift;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Populates the LogScaleConfig with "sane" default values.
|
|
||||||
void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config);
|
|
||||||
|
|
||||||
// Allocates any buffers.
|
|
||||||
int LogScalePopulateState(const struct LogScaleConfig* config,
|
|
||||||
struct LogScaleState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
|
|
||||||
@@ -1,51 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) {
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < state->num_channels; ++i) {
|
|
||||||
const uint32_t smoothing =
|
|
||||||
((i & 1) == 0) ? state->even_smoothing : state->odd_smoothing;
|
|
||||||
const uint32_t one_minus_smoothing = (1 << kNoiseReductionBits) - smoothing;
|
|
||||||
|
|
||||||
// Update the estimate of the noise.
|
|
||||||
const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits;
|
|
||||||
uint32_t estimate =
|
|
||||||
(((uint64_t)signal_scaled_up * smoothing) +
|
|
||||||
((uint64_t)state->estimate[i] * one_minus_smoothing)) >>
|
|
||||||
kNoiseReductionBits;
|
|
||||||
state->estimate[i] = estimate;
|
|
||||||
|
|
||||||
// Make sure that we can't get a negative value for the signal - estimate.
|
|
||||||
if (estimate > signal_scaled_up) {
|
|
||||||
estimate = signal_scaled_up;
|
|
||||||
}
|
|
||||||
|
|
||||||
const uint32_t floor =
|
|
||||||
((uint64_t)signal[i] * state->min_signal_remaining) >>
|
|
||||||
kNoiseReductionBits;
|
|
||||||
const uint32_t subtracted =
|
|
||||||
(signal_scaled_up - estimate) >> state->smoothing_bits;
|
|
||||||
const uint32_t output = subtracted > floor ? subtracted : floor;
|
|
||||||
signal[i] = output;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void NoiseReductionReset(struct NoiseReductionState* state) {
|
|
||||||
memset(state->estimate, 0, sizeof(*state->estimate) * state->num_channels);
|
|
||||||
}
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
|
|
||||||
|
|
||||||
#define kNoiseReductionBits 14
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct NoiseReductionState {
|
|
||||||
int smoothing_bits;
|
|
||||||
uint16_t even_smoothing;
|
|
||||||
uint16_t odd_smoothing;
|
|
||||||
uint16_t min_signal_remaining;
|
|
||||||
int num_channels;
|
|
||||||
uint32_t* estimate;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Removes stationary noise from each channel of the signal using a low pass
|
|
||||||
// filter.
|
|
||||||
void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal);
|
|
||||||
|
|
||||||
void NoiseReductionReset(struct NoiseReductionState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config) {
|
|
||||||
config->smoothing_bits = 10;
|
|
||||||
config->even_smoothing = 0.025;
|
|
||||||
config->odd_smoothing = 0.06;
|
|
||||||
config->min_signal_remaining = 0.05;
|
|
||||||
}
|
|
||||||
|
|
||||||
int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
|
|
||||||
struct NoiseReductionState* state,
|
|
||||||
int num_channels) {
|
|
||||||
state->smoothing_bits = config->smoothing_bits;
|
|
||||||
state->odd_smoothing = config->odd_smoothing * (1 << kNoiseReductionBits);
|
|
||||||
state->even_smoothing = config->even_smoothing * (1 << kNoiseReductionBits);
|
|
||||||
state->min_signal_remaining =
|
|
||||||
config->min_signal_remaining * (1 << kNoiseReductionBits);
|
|
||||||
state->num_channels = num_channels;
|
|
||||||
state->estimate = calloc(state->num_channels, sizeof(*state->estimate));
|
|
||||||
if (state->estimate == NULL) {
|
|
||||||
fprintf(stderr, "Failed to alloc estimate buffer\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void NoiseReductionFreeStateContents(struct NoiseReductionState* state) {
|
|
||||||
free(state->estimate);
|
|
||||||
}
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct NoiseReductionConfig {
|
|
||||||
// scale the signal up by 2^(smoothing_bits) before reduction
|
|
||||||
int smoothing_bits;
|
|
||||||
// smoothing coefficient for even-numbered channels
|
|
||||||
float even_smoothing;
|
|
||||||
// smoothing coefficient for odd-numbered channels
|
|
||||||
float odd_smoothing;
|
|
||||||
// fraction of signal to preserve (1.0 disables this module)
|
|
||||||
float min_signal_remaining;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Populates the NoiseReductionConfig with "sane" default values.
|
|
||||||
void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config);
|
|
||||||
|
|
||||||
// Allocates any buffers.
|
|
||||||
int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
|
|
||||||
struct NoiseReductionState* state,
|
|
||||||
int num_channels);
|
|
||||||
|
|
||||||
// Frees any allocated buffers.
|
|
||||||
void NoiseReductionFreeStateContents(struct NoiseReductionState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
|
|
||||||
|
|
||||||
int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut) {
|
|
||||||
if (x <= 2) {
|
|
||||||
return lut[x];
|
|
||||||
}
|
|
||||||
|
|
||||||
const int16_t interval = MostSignificantBit32(x);
|
|
||||||
lut += 4 * interval - 6;
|
|
||||||
|
|
||||||
const int16_t frac =
|
|
||||||
((interval < 11) ? (x << (11 - interval)) : (x >> (interval - 11))) &
|
|
||||||
0x3FF;
|
|
||||||
|
|
||||||
int32_t result = ((int32_t)lut[2] * frac) >> 5;
|
|
||||||
result += (int32_t)((uint32_t)lut[1] << 5);
|
|
||||||
result *= frac;
|
|
||||||
result = (result + (1 << 14)) >> 15;
|
|
||||||
result += lut[0];
|
|
||||||
return (int16_t)result;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t PcanShrink(const uint32_t x) {
|
|
||||||
if (x < (2 << kPcanSnrBits)) {
|
|
||||||
return (x * x) >> (2 + 2 * kPcanSnrBits - kPcanOutputBits);
|
|
||||||
} else {
|
|
||||||
return (x >> (kPcanSnrBits - kPcanOutputBits)) - (1 << kPcanOutputBits);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void PcanGainControlApply(struct PcanGainControlState* state,
|
|
||||||
uint32_t* signal) {
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < state->num_channels; ++i) {
|
|
||||||
const uint32_t gain =
|
|
||||||
WideDynamicFunction(state->noise_estimate[i], state->gain_lut);
|
|
||||||
const uint32_t snr = ((uint64_t)signal[i] * gain) >> state->snr_shift;
|
|
||||||
signal[i] = PcanShrink(snr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,47 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#define kPcanSnrBits 12
|
|
||||||
#define kPcanOutputBits 6
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Details at https://research.google/pubs/pub45911.pdf
|
|
||||||
struct PcanGainControlState {
|
|
||||||
int enable_pcan;
|
|
||||||
uint32_t* noise_estimate;
|
|
||||||
int num_channels;
|
|
||||||
int16_t* gain_lut;
|
|
||||||
int32_t snr_shift;
|
|
||||||
};
|
|
||||||
|
|
||||||
int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut);
|
|
||||||
|
|
||||||
uint32_t PcanShrink(const uint32_t x);
|
|
||||||
|
|
||||||
void PcanGainControlApply(struct PcanGainControlState* state, uint32_t* signal);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
|
|
||||||
@@ -1,92 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
|
|
||||||
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#define kint16max 0x00007FFF
|
|
||||||
|
|
||||||
void PcanGainControlFillConfigWithDefaults(
|
|
||||||
struct PcanGainControlConfig* config) {
|
|
||||||
config->enable_pcan = 0;
|
|
||||||
config->strength = 0.95;
|
|
||||||
config->offset = 80.0;
|
|
||||||
config->gain_bits = 21;
|
|
||||||
}
|
|
||||||
|
|
||||||
int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
|
|
||||||
int32_t input_bits, uint32_t x) {
|
|
||||||
const float x_as_float = ((float)x) / ((uint32_t)1 << input_bits);
|
|
||||||
const float gain_as_float =
|
|
||||||
((uint32_t)1 << config->gain_bits) *
|
|
||||||
powf(x_as_float + config->offset, -config->strength);
|
|
||||||
|
|
||||||
if (gain_as_float > kint16max) {
|
|
||||||
return kint16max;
|
|
||||||
}
|
|
||||||
return (int16_t)(gain_as_float + 0.5f);
|
|
||||||
}
|
|
||||||
|
|
||||||
int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
|
|
||||||
struct PcanGainControlState* state,
|
|
||||||
uint32_t* noise_estimate,
|
|
||||||
const int num_channels,
|
|
||||||
const uint16_t smoothing_bits,
|
|
||||||
const int32_t input_correction_bits) {
|
|
||||||
state->enable_pcan = config->enable_pcan;
|
|
||||||
if (!state->enable_pcan) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
state->noise_estimate = noise_estimate;
|
|
||||||
state->num_channels = num_channels;
|
|
||||||
state->gain_lut = malloc(kWideDynamicFunctionLUTSize * sizeof(int16_t));
|
|
||||||
if (state->gain_lut == NULL) {
|
|
||||||
fprintf(stderr, "Failed to allocate gain LUT\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
state->snr_shift = config->gain_bits - input_correction_bits - kPcanSnrBits;
|
|
||||||
|
|
||||||
const int32_t input_bits = smoothing_bits - input_correction_bits;
|
|
||||||
state->gain_lut[0] = PcanGainLookupFunction(config, input_bits, 0);
|
|
||||||
state->gain_lut[1] = PcanGainLookupFunction(config, input_bits, 1);
|
|
||||||
state->gain_lut -= 6;
|
|
||||||
int interval;
|
|
||||||
for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
|
|
||||||
const uint32_t x0 = (uint32_t)1 << (interval - 1);
|
|
||||||
const uint32_t x1 = x0 + (x0 >> 1);
|
|
||||||
const uint32_t x2 =
|
|
||||||
(interval == kWideDynamicFunctionBits) ? x0 + (x0 - 1) : 2 * x0;
|
|
||||||
|
|
||||||
const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0);
|
|
||||||
const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1);
|
|
||||||
const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2);
|
|
||||||
|
|
||||||
const int32_t diff1 = (int32_t)y1 - y0;
|
|
||||||
const int32_t diff2 = (int32_t)y2 - y0;
|
|
||||||
const int32_t a1 = 4 * diff1 - diff2;
|
|
||||||
const int32_t a2 = diff2 - a1;
|
|
||||||
|
|
||||||
state->gain_lut[4 * interval] = y0;
|
|
||||||
state->gain_lut[4 * interval + 1] = (int16_t)a1;
|
|
||||||
state->gain_lut[4 * interval + 2] = (int16_t)a2;
|
|
||||||
}
|
|
||||||
state->gain_lut += 6;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PcanGainControlFreeStateContents(struct PcanGainControlState* state) {
|
|
||||||
free(state->gain_lut);
|
|
||||||
}
|
|
||||||
@@ -1,57 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
|
|
||||||
|
|
||||||
#define kWideDynamicFunctionBits 32
|
|
||||||
#define kWideDynamicFunctionLUTSize (4 * kWideDynamicFunctionBits - 3)
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct PcanGainControlConfig {
|
|
||||||
// set to false (0) to disable this module
|
|
||||||
int enable_pcan;
|
|
||||||
// gain normalization exponent (0.0 disables, 1.0 full strength)
|
|
||||||
float strength;
|
|
||||||
// positive value added in the normalization denominator
|
|
||||||
float offset;
|
|
||||||
// number of fractional bits in the gain
|
|
||||||
int gain_bits;
|
|
||||||
};
|
|
||||||
|
|
||||||
void PcanGainControlFillConfigWithDefaults(
|
|
||||||
struct PcanGainControlConfig* config);
|
|
||||||
|
|
||||||
int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
|
|
||||||
int32_t input_bits, uint32_t x);
|
|
||||||
|
|
||||||
int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
|
|
||||||
struct PcanGainControlState* state,
|
|
||||||
uint32_t* noise_estimate,
|
|
||||||
const int num_channels,
|
|
||||||
const uint16_t smoothing_bits,
|
|
||||||
const int32_t input_correction_bits);
|
|
||||||
|
|
||||||
void PcanGainControlFreeStateContents(struct PcanGainControlState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
|
|
||||||
@@ -1,70 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
|
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
|
|
||||||
size_t num_samples, size_t* num_samples_read) {
|
|
||||||
const int size = state->size;
|
|
||||||
|
|
||||||
// Copy samples from the samples buffer over to our local input.
|
|
||||||
size_t max_samples_to_copy = state->size - state->input_used;
|
|
||||||
if (max_samples_to_copy > num_samples) {
|
|
||||||
max_samples_to_copy = num_samples;
|
|
||||||
}
|
|
||||||
memcpy(state->input + state->input_used, samples,
|
|
||||||
max_samples_to_copy * sizeof(*samples));
|
|
||||||
*num_samples_read = max_samples_to_copy;
|
|
||||||
state->input_used += max_samples_to_copy;
|
|
||||||
|
|
||||||
if (state->input_used < state->size) {
|
|
||||||
// We don't have enough samples to compute a window.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply the window to the input.
|
|
||||||
const int16_t* coefficients = state->coefficients;
|
|
||||||
const int16_t* input = state->input;
|
|
||||||
int16_t* output = state->output;
|
|
||||||
int i;
|
|
||||||
int16_t max_abs_output_value = 0;
|
|
||||||
for (i = 0; i < size; ++i) {
|
|
||||||
int16_t new_value =
|
|
||||||
(((int32_t)*input++) * *coefficients++) >> kFrontendWindowBits;
|
|
||||||
*output++ = new_value;
|
|
||||||
if (new_value < 0) {
|
|
||||||
new_value = -new_value;
|
|
||||||
}
|
|
||||||
if (new_value > max_abs_output_value) {
|
|
||||||
max_abs_output_value = new_value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Shuffle the input down by the step size, and update how much we have used.
|
|
||||||
memmove(state->input, state->input + state->step,
|
|
||||||
sizeof(*state->input) * (state->size - state->step));
|
|
||||||
state->input_used -= state->step;
|
|
||||||
state->max_abs_output_value = max_abs_output_value;
|
|
||||||
|
|
||||||
// Indicate that the output buffer is valid for the next stage.
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void WindowReset(struct WindowState* state) {
|
|
||||||
memset(state->input, 0, state->size * sizeof(*state->input));
|
|
||||||
memset(state->output, 0, state->size * sizeof(*state->output));
|
|
||||||
state->input_used = 0;
|
|
||||||
state->max_abs_output_value = 0;
|
|
||||||
}
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#define kFrontendWindowBits 12
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct WindowState {
|
|
||||||
size_t size;
|
|
||||||
int16_t* coefficients;
|
|
||||||
size_t step;
|
|
||||||
|
|
||||||
int16_t* input;
|
|
||||||
size_t input_used;
|
|
||||||
int16_t* output;
|
|
||||||
int16_t max_abs_output_value;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Applies a window to the samples coming in, stepping forward at the given
|
|
||||||
// rate.
|
|
||||||
int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
|
|
||||||
size_t num_samples, size_t* num_samples_read);
|
|
||||||
|
|
||||||
void WindowReset(struct WindowState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
|
|
||||||
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
// Some platforms don't have M_PI
|
|
||||||
#ifndef M_PI
|
|
||||||
#define M_PI 3.14159265358979323846
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void WindowFillConfigWithDefaults(struct WindowConfig* config) {
|
|
||||||
config->size_ms = 25;
|
|
||||||
config->step_size_ms = 10;
|
|
||||||
}
|
|
||||||
|
|
||||||
int WindowPopulateState(const struct WindowConfig* config,
|
|
||||||
struct WindowState* state, int sample_rate) {
|
|
||||||
state->size = config->size_ms * sample_rate / 1000;
|
|
||||||
state->step = config->step_size_ms * sample_rate / 1000;
|
|
||||||
|
|
||||||
state->coefficients = malloc(state->size * sizeof(*state->coefficients));
|
|
||||||
if (state->coefficients == NULL) {
|
|
||||||
fprintf(stderr, "Failed to allocate window coefficients\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Populate the window values.
|
|
||||||
const float arg = M_PI * 2.0 / ((float)state->size);
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < state->size; ++i) {
|
|
||||||
float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5)));
|
|
||||||
// Scale it to fixed point and round it.
|
|
||||||
state->coefficients[i] =
|
|
||||||
floor(float_value * (1 << kFrontendWindowBits) + 0.5);
|
|
||||||
}
|
|
||||||
|
|
||||||
state->input_used = 0;
|
|
||||||
state->input = malloc(state->size * sizeof(*state->input));
|
|
||||||
if (state->input == NULL) {
|
|
||||||
fprintf(stderr, "Failed to allocate window input\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
state->output = malloc(state->size * sizeof(*state->output));
|
|
||||||
if (state->output == NULL) {
|
|
||||||
fprintf(stderr, "Failed to allocate window output\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void WindowFreeStateContents(struct WindowState* state) {
|
|
||||||
free(state->coefficients);
|
|
||||||
free(state->input);
|
|
||||||
free(state->output);
|
|
||||||
}
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
|
|
||||||
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct WindowConfig {
|
|
||||||
// length of window frame in milliseconds
|
|
||||||
size_t size_ms;
|
|
||||||
// length of step for next frame in milliseconds
|
|
||||||
size_t step_size_ms;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Populates the WindowConfig with "sane" default values.
|
|
||||||
void WindowFillConfigWithDefaults(struct WindowConfig* config);
|
|
||||||
|
|
||||||
// Allocates any buffers.
|
|
||||||
int WindowPopulateState(const struct WindowConfig* config,
|
|
||||||
struct WindowState* state, int sample_rate);
|
|
||||||
|
|
||||||
// Frees any allocated buffers.
|
|
||||||
void WindowFreeStateContents(struct WindowState* state);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} // extern "C"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,122 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/op_macros.h"
|
|
||||||
|
|
||||||
#ifndef TFLITE_DCHECK
|
|
||||||
#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_DCHECK_EQ
|
|
||||||
#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_DCHECK_NE
|
|
||||||
#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_DCHECK_GE
|
|
||||||
#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_DCHECK_GT
|
|
||||||
#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_DCHECK_LE
|
|
||||||
#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_DCHECK_LT
|
|
||||||
#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
|
|
||||||
#ifndef TFLITE_CHECK
|
|
||||||
#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_CHECK_EQ
|
|
||||||
#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_CHECK_NE
|
|
||||||
#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_CHECK_GE
|
|
||||||
#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_CHECK_GT
|
|
||||||
#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_CHECK_LE
|
|
||||||
#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_CHECK_LT
|
|
||||||
#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TF_LITE_STATIC_MEMORY
|
|
||||||
// TODO(b/162019032): Consider removing these type-aliases.
|
|
||||||
using int8 = std::int8_t;
|
|
||||||
using uint8 = std::uint8_t;
|
|
||||||
using int16 = std::int16_t;
|
|
||||||
using uint16 = std::uint16_t;
|
|
||||||
using int32 = std::int32_t;
|
|
||||||
using uint32 = std::uint32_t;
|
|
||||||
#endif // !defined(TF_LITE_STATIC_MEMORY)
|
|
||||||
|
|
||||||
// Allow for cross-compiler usage of function signatures - currently used for
|
|
||||||
// specifying named RUY profiler regions in templated methods.
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#define TFLITE_PRETTY_FUNCTION __FUNCSIG__
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#define TFLITE_PRETTY_FUNCTION __PRETTY_FUNCTION__
|
|
||||||
#else
|
|
||||||
#define TFLITE_PRETTY_FUNCTION __func__
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// TFLITE_DEPRECATED()
|
|
||||||
//
|
|
||||||
// Duplicated from absl/base/macros.h to avoid pulling in that library.
|
|
||||||
// Marks a deprecated class, struct, enum, function, method and variable
|
|
||||||
// declarations. The macro argument is used as a custom diagnostic message (e.g.
|
|
||||||
// suggestion of a better alternative).
|
|
||||||
//
|
|
||||||
// Example:
|
|
||||||
//
|
|
||||||
// class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
|
|
||||||
// TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
|
|
||||||
//
|
|
||||||
// Every usage of a deprecated entity will trigger a warning when compiled with
|
|
||||||
// clang's `-Wdeprecated-declarations` option. This option is turned off by
|
|
||||||
// default, but the warnings will be reported by clang-tidy.
|
|
||||||
#if defined(__clang__) && __cplusplus >= 201103L
|
|
||||||
#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef TFLITE_DEPRECATED
|
|
||||||
#define TFLITE_DEPRECATED(message)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
|
|
||||||
@@ -1,40 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
|
|
||||||
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__)
|
|
||||||
#define TF_LITE_GLOBAL_STD_PREFIX
|
|
||||||
#else
|
|
||||||
#define TF_LITE_GLOBAL_STD_PREFIX std
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \
|
|
||||||
template <class T> \
|
|
||||||
inline T tf_name(const T x) { \
|
|
||||||
return TF_LITE_GLOBAL_STD_PREFIX::std_name(x); \
|
|
||||||
}
|
|
||||||
|
|
||||||
DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round);
|
|
||||||
DECLARE_STD_GLOBAL_SWITCH1(TfLiteExpm1, expm1);
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
|
|
||||||
inline float TfLiteMax(const float& x, const float& y) {
|
|
||||||
return std::max(x, y);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
template <class T>
|
|
||||||
inline T TfLiteMax(const T& x, const T& y) {
|
|
||||||
return std::fmax(x, y);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
|
|
||||||
inline float TfLiteMin(const float& x, const float& y) {
|
|
||||||
return std::min(x, y);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
template <class T>
|
|
||||||
inline T TfLiteMin(const T& x, const T& y) {
|
|
||||||
return std::fmin(x, y);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
|
|
||||||
|
|
||||||
// TFLM does not need to utilize any Neon optimizations.
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
|
|
||||||
@@ -1,122 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
inline RuntimeShape GetTensorShape(std::vector<int32_t> data) {
|
|
||||||
return RuntimeShape(data.size(), data.data());
|
|
||||||
}
|
|
||||||
|
|
||||||
// A list of tensors in a format that can be used by kernels like split and
|
|
||||||
// concatenation.
|
|
||||||
template <typename T>
|
|
||||||
class VectorOfTensors {
|
|
||||||
public:
|
|
||||||
// Build with the tensors in 'tensor_list'.
|
|
||||||
VectorOfTensors(const TfLiteContext& context,
|
|
||||||
const TfLiteIntArray& tensor_list) {
|
|
||||||
int num_tensors = tensor_list.size;
|
|
||||||
|
|
||||||
all_data_.reserve(num_tensors);
|
|
||||||
all_shape_.reserve(num_tensors);
|
|
||||||
all_shape_ptr_.reserve(num_tensors);
|
|
||||||
|
|
||||||
for (int i = 0; i < num_tensors; ++i) {
|
|
||||||
TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
|
||||||
all_data_.push_back(GetTensorData<T>(t));
|
|
||||||
all_shape_.push_back(GetTensorShape(t));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Taking the pointer from inside a std::vector is only OK if the vector is
|
|
||||||
// never modified, so we populate all_shape in the previous loop and then we
|
|
||||||
// are free to grab iterators here.
|
|
||||||
for (int i = 0; i < num_tensors; ++i) {
|
|
||||||
all_shape_ptr_.push_back(&all_shape_[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Return a pointer to the data pointers of all tensors in the list. For
|
|
||||||
// example:
|
|
||||||
// float* const* f = v.data();
|
|
||||||
// f[0][1] is the second element of the first tensor.
|
|
||||||
T* const* data() const { return all_data_.data(); }
|
|
||||||
|
|
||||||
// Return a pointer the shape pointers of all tensors in the list. For
|
|
||||||
// example:
|
|
||||||
// const RuntimeShape* const* d = v.dims();
|
|
||||||
// dims[1] are the dimensions of the second tensor in the list.
|
|
||||||
const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<T*> all_data_;
|
|
||||||
std::vector<RuntimeShape> all_shape_;
|
|
||||||
std::vector<RuntimeShape*> all_shape_ptr_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// A list of quantized tensors in a format that can be used by kernels like
|
|
||||||
// split and concatenation.
|
|
||||||
class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t> {
|
|
||||||
public:
|
|
||||||
// Build with the tensors in 'tensor_list'.
|
|
||||||
VectorOfQuantizedTensors(const TfLiteContext& context,
|
|
||||||
const TfLiteIntArray& tensor_list)
|
|
||||||
: VectorOfTensors<uint8_t>(context, tensor_list) {
|
|
||||||
for (int i = 0; i < tensor_list.size; ++i) {
|
|
||||||
TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
|
||||||
zero_point_.push_back(t->params.zero_point);
|
|
||||||
scale_.push_back(t->params.scale);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const float* scale() const { return scale_.data(); }
|
|
||||||
const int32_t* zero_point() const { return zero_point_.data(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<int32_t> zero_point_;
|
|
||||||
std::vector<float> scale_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Writes randomly accessed values from `input` sequentially into `output`.
|
|
||||||
template <typename T>
|
|
||||||
class SequentialTensorWriter {
|
|
||||||
public:
|
|
||||||
SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output) {
|
|
||||||
input_data_ = GetTensorData<T>(input);
|
|
||||||
output_ptr_ = GetTensorData<T>(output);
|
|
||||||
}
|
|
||||||
SequentialTensorWriter(const T* input_data, T* output_data)
|
|
||||||
: input_data_(input_data), output_ptr_(output_data) {}
|
|
||||||
|
|
||||||
void Write(int position) { *output_ptr_++ = input_data_[position]; }
|
|
||||||
void WriteN(int position, int len) {
|
|
||||||
memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
|
|
||||||
output_ptr_ += len;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
const T* input_data_;
|
|
||||||
T* output_ptr_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
|
|
||||||
@@ -1,484 +0,0 @@
|
|||||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#define __restrict__ __restrict
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace tensor_utils {
|
|
||||||
|
|
||||||
// Multiplies a matrix with a scalar and reduce the result on each row to a
|
|
||||||
// scalar.
|
|
||||||
// Parameters:
|
|
||||||
// - matrix: matrix of size n_row * n_col
|
|
||||||
// - scalar: the scalar that is multiplied to each element in the matrix
|
|
||||||
// - n_row: the row count of the matrix
|
|
||||||
// - n_col: the column count of the matrix
|
|
||||||
// - output: the 32bit output
|
|
||||||
// Note: We do not need saturation because the int8 * int8 is safe from overflow
|
|
||||||
// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero
|
|
||||||
// initial output value is not exceptionally large.
|
|
||||||
void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
|
|
||||||
int32_t n_row, int32_t n_col,
|
|
||||||
int32_t* output);
|
|
||||||
|
|
||||||
// Add another vector for each batch in the batch vector.
|
|
||||||
template <typename T>
|
|
||||||
void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch,
|
|
||||||
T* batch_vector) {
|
|
||||||
for (int b = 0; b < n_batch; b++) {
|
|
||||||
for (int i = 0; i < v_size; ++i) {
|
|
||||||
batch_vector[i] += vector[i];
|
|
||||||
}
|
|
||||||
batch_vector += v_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cwise product of two vectors.
|
|
||||||
template <typename T>
|
|
||||||
inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2,
|
|
||||||
int v_size, T* result) {
|
|
||||||
for (int v = 0; v < v_size; v++) {
|
|
||||||
*result++ = *vector1++ * *vector2++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cwise product of a vector and a batch-vector.
|
|
||||||
template <typename T>
|
|
||||||
inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size,
|
|
||||||
const T* batch_vector, int n_batch,
|
|
||||||
T* result) {
|
|
||||||
for (int b = 0; b < n_batch; b++) {
|
|
||||||
VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
|
|
||||||
// Update the pointers.
|
|
||||||
result += v_size;
|
|
||||||
batch_vector += v_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
|
|
||||||
// assumption here is that result array is initialized to valid values.
|
|
||||||
template <typename T>
|
|
||||||
inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
|
|
||||||
const T* __restrict__ vector2,
|
|
||||||
int v_size,
|
|
||||||
T* __restrict__ result) {
|
|
||||||
for (int v = 0; v < v_size; v++) {
|
|
||||||
*result++ += *vector1++ * *vector2++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
|
|
||||||
// operation, the assumption here is that result array is initialized to valid
|
|
||||||
// values.
|
|
||||||
template <typename T>
|
|
||||||
inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size,
|
|
||||||
const T* batch_vector,
|
|
||||||
int n_batch, T* result) {
|
|
||||||
for (int b = 0; b < n_batch; b++) {
|
|
||||||
VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
|
|
||||||
// Update the pointers.
|
|
||||||
result += v_size;
|
|
||||||
batch_vector += v_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Batch vector initialization with another vector.
|
|
||||||
template <typename T>
|
|
||||||
void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
|
|
||||||
T* batch_vector) {
|
|
||||||
for (int b = 0; b < n_batch; b++) {
|
|
||||||
std::copy_n(vector, v_size, batch_vector + b * v_size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Checks if all entries of vector are zero for float.
|
|
||||||
bool IsZeroVector(const float* vector, int v_size);
|
|
||||||
|
|
||||||
// Checks if all entries of vector are zero for int8.
|
|
||||||
bool IsZeroVector(const int8_t* vector, int v_size);
|
|
||||||
|
|
||||||
// Quantizes a buffer of floating point values using a symmetric quantization
|
|
||||||
// (i.e. linear quantization without an offset) to 8-bit signed integers.
|
|
||||||
// It also outputs the range (min, max) of the floating point buffer, and the
|
|
||||||
// scaling factor used to quantize the values.
|
|
||||||
void SymmetricQuantizeFloats(const float* values, const int size,
|
|
||||||
int8_t* quantized_values, float* min_value,
|
|
||||||
float* max_value, float* scaling_factor);
|
|
||||||
|
|
||||||
// Quantizes a buffer of floating point values using a symmetric quantization
|
|
||||||
// (i.e. linear quantization without an offset) to 8-bit signed integers.
|
|
||||||
// It uses the range (min, max) provided to the function to calculate the
|
|
||||||
// appropriate scaling factor to quantize the values.
|
|
||||||
void SymmetricQuantizeFloats(const float* values, const int size,
|
|
||||||
int8_t* quantized_values, float min_value,
|
|
||||||
float max_value, float* scaling_factor);
|
|
||||||
|
|
||||||
void AsymmetricQuantizeFloats(const float* values, const int size,
|
|
||||||
int8_t* quantized_values, float* scaling_factor,
|
|
||||||
int32_t* offset);
|
|
||||||
|
|
||||||
// Helper function to quantize floats.
|
|
||||||
// float_data_ptr input float vectors
|
|
||||||
// n_batch number of input vectors
|
|
||||||
// n_data size of a single input vector
|
|
||||||
// quantized_data_ptr (out) vector with quantized data
|
|
||||||
// scaling_factors (out) scaling factors (one per vector)
|
|
||||||
// zero_points (out) zero points (one per vector)
|
|
||||||
// do_asymmetric controls if the quantization should be asymmetric.
|
|
||||||
inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
|
|
||||||
int n_data, int8_t* quantized_data_ptr,
|
|
||||||
float* scaling_factors, int32_t* zero_points,
|
|
||||||
bool do_asymmetric) {
|
|
||||||
for (int b = 0; b < n_batch; ++b) {
|
|
||||||
const int offset = b * n_data;
|
|
||||||
if (do_asymmetric) {
|
|
||||||
tensor_utils::AsymmetricQuantizeFloats(
|
|
||||||
float_data_ptr + offset, n_data, quantized_data_ptr + offset,
|
|
||||||
&scaling_factors[b], &zero_points[b]);
|
|
||||||
} else {
|
|
||||||
float unused_min, unused_max;
|
|
||||||
tensor_utils::SymmetricQuantizeFloats(
|
|
||||||
float_data_ptr + offset, n_data, quantized_data_ptr + offset,
|
|
||||||
&unused_min, &unused_max, &scaling_factors[b]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
|
|
||||||
// dimension composed by input vectors independent from each other). The result
|
|
||||||
// of the multiplication is accumulated to the passed result buffer.
|
|
||||||
// More specifically, for a matrix M of shape [n, i] and a batched-vector
|
|
||||||
// of shape [i, batch] it will first compute the product of shape [n, batch].
|
|
||||||
// This product will be accumulated to the result buffer.
|
|
||||||
void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
|
|
||||||
int m_cols, const float* vector,
|
|
||||||
int n_batch, float* result);
|
|
||||||
|
|
||||||
// Same as the function above, but the matrix is a sparse tensor with block
|
|
||||||
// pattern 1x4.
|
|
||||||
// This function assumes that m_cols is a multiple of the block size (4 in this
|
|
||||||
// case) so that there's no incomplete block.
|
|
||||||
void SparseMatrixBatchVectorMultiplyAccumulate1x4(
|
|
||||||
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
|
|
||||||
const int32_t* __restrict__ indices, int m_rows, int m_cols,
|
|
||||||
const float* __restrict__ vector, int n_batch, float* __restrict__ result);
|
|
||||||
|
|
||||||
// Same as the function above, but the matrix is stored in block compressed
|
|
||||||
// sparse row format with block pattern 1x16 which consists of two arrays:
|
|
||||||
// 1. A matrix array stores non-zero blocks of the matrix in row major.
|
|
||||||
// 2. A ledger array stores nrows groups, one group per row. Each group starts
|
|
||||||
// with an integer representing the number of non-zero blocks for the
|
|
||||||
// corresponding row and follows with column indexes of the first element
|
|
||||||
// of each non-zero block.
|
|
||||||
// This function assumes that
|
|
||||||
// 1. m_cols is a multiple of 16 so that all blocks are full blocks.
|
|
||||||
// 2. m_cols < 254 * 16 so that block index can be represented by uint8.
|
|
||||||
void SparseMatrixBatchVectorMultiplyAccumulate(
|
|
||||||
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
|
|
||||||
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
|
|
||||||
float* __restrict__ result);
|
|
||||||
|
|
||||||
// Same as the function above, but for values quantized using symmetric
|
|
||||||
// quantization (e.g. by calling SymmetricQuantizeFloats).
|
|
||||||
// The passed scaling factors is a buffer of the quantization scaling factors
|
|
||||||
// that will be used to dequentize the products into the final result buffer.
|
|
||||||
// These scaling factors are the multiplication of the matrix scaling factor
|
|
||||||
// by the vector's scaling factor, one per batch (i.e. this allows quantizing
|
|
||||||
// each batch in the batch-vector matrix independently).
|
|
||||||
void MatrixBatchVectorMultiplyAccumulate(
|
|
||||||
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
|
|
||||||
const int8_t* __restrict__ vectors,
|
|
||||||
const float* __restrict__ scaling_factors, int n_batch,
|
|
||||||
float* __restrict__ result);
|
|
||||||
|
|
||||||
// Same as the function above except that vector values
|
|
||||||
// are quantized with asymmetric quantization per-batch and the matrix
|
|
||||||
// is quantized per row.
|
|
||||||
void MatrixBatchVectorMultiplyAccumulate(
|
|
||||||
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
|
|
||||||
const int8_t* __restrict__ vectors,
|
|
||||||
const float* __restrict__ scaling_factors, int n_batch,
|
|
||||||
float* __restrict__ result, const float* __restrict__ per_channel_scale,
|
|
||||||
const int32_t* __restrict__ input_offset);
|
|
||||||
|
|
||||||
// Same as the function above, but the matrix is a sparse tensor with block
|
|
||||||
// pattern 1x16.
|
|
||||||
// This function assumes that m_cols is a multiple of the block size (16 in this
|
|
||||||
// case) so that there's no incomplete block. Also, it assumes all offsets of
|
|
||||||
// input, output and filter are zero.
|
|
||||||
void SparseMatrixBatchVectorMultiplyAccumulate1x16(
|
|
||||||
const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
|
|
||||||
const int32_t* __restrict__ indices, int m_rows, int m_cols,
|
|
||||||
const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
|
|
||||||
int n_batch, const int32_t input_offset, const int32_t output_multiplier,
|
|
||||||
const int32_t output_shift, const int32_t output_offset,
|
|
||||||
const int32_t output_activation_min, const int32_t output_activation_max,
|
|
||||||
int8_t* __restrict__ result);
|
|
||||||
|
|
||||||
// Same as the function above, but the matrix is stored in block compressed
|
|
||||||
// sparse row format with block pattern 1x16 which consists of two arrays:
|
|
||||||
// 1. A matrix array stores non-zero blocks of the matrix in row major.
|
|
||||||
// 2. A ledger array stores nrows groups, one group per row. Each group starts
|
|
||||||
// with an integer representing the number of non-zero blocks for the
|
|
||||||
// corresponding row followed by column index of the first element of
|
|
||||||
// each non-zero block.
|
|
||||||
// This function assumes that
|
|
||||||
// 1. m_cols is a multiple of 16 so that all blocks are full blocks.
|
|
||||||
// 2. m_cols < 254 * 16 so that block index can be represented by uint8.
|
|
||||||
void SparseMatrixBatchVectorMultiplyAccumulate(
|
|
||||||
const int8_t* __restrict__ matrix, const uint8_t* __restrict__ ledger,
|
|
||||||
const int m_rows, const int m_cols, const int8_t* __restrict__ vectors,
|
|
||||||
const float* __restrict__ scaling_factors, int n_batch,
|
|
||||||
float* __restrict__ result);
|
|
||||||
|
|
||||||
// Same as the above 8, 8, 8 integer matmul except for the presence of zero
|
|
||||||
// point and non-accumulative.
|
|
||||||
// TODO(b/148688698): remove this function by folding zero point calculation in
|
|
||||||
// prepare() function.
|
|
||||||
void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
|
|
||||||
const int8_t* input_to_gate_weights,
|
|
||||||
int32_t input_to_gate_effective_scale_a,
|
|
||||||
int32_t input_to_gate_effective_scale_b,
|
|
||||||
int32_t n_batch, int32_t n_input, int32_t n_cell,
|
|
||||||
int8_t* gate_output, int8_t gate_output_zp);
|
|
||||||
|
|
||||||
// Same as above but has 16 bit and 8 bit input and 8 bit output.
|
|
||||||
// Used in projection when hidden is 16bit.
|
|
||||||
void MatrixBatchVectorMultiply(const int16_t* hidden,
|
|
||||||
const int8_t* hidden_to_output_weights,
|
|
||||||
int32_t proj_effective_scale_a,
|
|
||||||
int32_t proj_effective_scale_b,
|
|
||||||
const int32_t* gate_bias, int32_t n_batch,
|
|
||||||
int32_t n_hidden, int32_t n_output,
|
|
||||||
int32_t output_zp, int8_t* proj_output);
|
|
||||||
|
|
||||||
// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized
|
|
||||||
// vector.
|
|
||||||
// Parameters:
|
|
||||||
// - input: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - layer_norm_weights: the quantized layer normalization weights.
|
|
||||||
// - bias: the bias for the layer normalization.
|
|
||||||
// - layer_norm_scale_a: multiplier for scale factor.
|
|
||||||
// - layer_norm_scale_b: shift for scale factor.
|
|
||||||
// - variance_limit: the guard to make sure the inverse does not overflow.
|
|
||||||
// - n_batch: the number of batches.
|
|
||||||
// - n_input: the size for input and output.
|
|
||||||
// - output: the 16 bit output
|
|
||||||
void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
|
|
||||||
const int32_t* bias, int32_t layer_norm_scale_a,
|
|
||||||
int32_t layer_norm_scale_b, int32_t variance_limit,
|
|
||||||
int n_batch, int n_input, int16_t* output);
|
|
||||||
|
|
||||||
// Same as above but the internal calculation is done in float.
|
|
||||||
void ApplyLayerNormFloat(const int16_t* input,
|
|
||||||
const int16_t* layer_norm_weights,
|
|
||||||
int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
|
|
||||||
const int32_t* bias, int n_batch, int n_input,
|
|
||||||
int16_t* output);
|
|
||||||
|
|
||||||
// Apply Sigmoid to a quantized vector.
|
|
||||||
// Parameters:
|
|
||||||
// - input: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - n_batch: the number of batches.
|
|
||||||
// - n_input: the size for input and output.
|
|
||||||
// - output: the 16 bit output
|
|
||||||
// The input is in Q3.12 format and the output is in Q0.15 format.
|
|
||||||
void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
|
|
||||||
int16_t* output);
|
|
||||||
|
|
||||||
// Same as above but the internal calcualtion is float.
|
|
||||||
void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
|
|
||||||
int16_t* output);
|
|
||||||
|
|
||||||
// Apply Tanh to a quantized vector.
|
|
||||||
// Parameters:
|
|
||||||
// - integer_bits: the integer bits of the input.
|
|
||||||
// Currently supports 0, 1, 2, 3, 4, 5, 6.
|
|
||||||
// - input: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - n_batch: the number of batches.
|
|
||||||
// - n_input: the size for input and output.
|
|
||||||
// - output: the 16 bit output
|
|
||||||
// The input is in Qm.15-m format and the output is in Q0.15 format.
|
|
||||||
void ApplyTanh(int32_t intger_bits, const int16_t* input, int32_t n_batch,
|
|
||||||
int32_t n_input, int16_t* output);
|
|
||||||
|
|
||||||
// Apply Tanh to a quantized vector. Tbe internal calculation is in float.
|
|
||||||
// - Input has 2^(integer_bits) as scale.
|
|
||||||
// - Output has Q0.15 as scale.
|
|
||||||
void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
|
|
||||||
int32_t integer_bits, int16_t* output);
|
|
||||||
|
|
||||||
// Element-wise multiplication of two quantized vectors.
|
|
||||||
// Parameters:
|
|
||||||
// - input_1: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - input_2: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - n_batch: the number of batches.
|
|
||||||
// - n_input: the size for input and output.
|
|
||||||
// - shift: the shift needed to produce the output.
|
|
||||||
// - output: the 16 bit output of size n_batch * n_input.
|
|
||||||
// Output does not need to be initialized.
|
|
||||||
void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
|
|
||||||
int n_input, int shift, int16_t* output);
|
|
||||||
|
|
||||||
// Element-wise multiplication of two quantized vectors.
|
|
||||||
// Parameters:
|
|
||||||
// - input_1: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - input_2: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - n_batch: the number of batches.
|
|
||||||
// - n_input: the size for input and output.
|
|
||||||
// - shift: the shift needed to produce the output.
|
|
||||||
// - output: the 8 bit output of size n_batch * n_input.
|
|
||||||
// Output does not need to be initialized.
|
|
||||||
void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
|
|
||||||
int n_input, int shift, int8_t* output);
|
|
||||||
|
|
||||||
// Element-wise multiplication of two quantized vectors with rescaling.
|
|
||||||
// Parameters:
|
|
||||||
// - input_1: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - input_2: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - multiplier: the multiplier part of scale.
|
|
||||||
// - shift: the shift part of scale.
|
|
||||||
// - n_batch: the number of batches.
|
|
||||||
// - n_input: the size for input and output.
|
|
||||||
// - output: the 8 bit output of size n_batch * n_input.
|
|
||||||
// - output_zp: the zero point of output.
|
|
||||||
// Output does not need to be initialized.
|
|
||||||
// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m *
|
|
||||||
// 2^(s - 31).
|
|
||||||
void CwiseMul(const int16_t* input_1, const int16_t* input_2,
|
|
||||||
int32_t multiplier, int32_t shift, int32_t n_batch,
|
|
||||||
int32_t n_input, int32_t output_zp, int8_t* output);
|
|
||||||
|
|
||||||
// Element-wise saturating addition of two quantized vectors without rescaling.
|
|
||||||
// Parameters:
|
|
||||||
// - input_1: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - input_2: batch vector of size n_batch * n_input; 16 bit.
|
|
||||||
// - n_batch: the number of batches.
|
|
||||||
// - n_input: the size for input and output.
|
|
||||||
// - output: the 8 bit output of size n_batch * n_input.
|
|
||||||
// Output does not need to be initialized.
|
|
||||||
void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
|
|
||||||
int n_input, int16_t* output);
|
|
||||||
|
|
||||||
// Element-wise in-place clipping of a vector. Overloaded for float, int16_t,
|
|
||||||
// int8_t. Parameters:
|
|
||||||
// - vector: vector of size v_size.
|
|
||||||
// - v_size: the size of the vector.
|
|
||||||
// - clipping_value: the value used for clipping.
|
|
||||||
void CwiseClipping(float* vector, const int v_size, const float clipping_value);
|
|
||||||
void CwiseClipping(int16_t* vector, const int v_size,
|
|
||||||
const int16_t clipping_value);
|
|
||||||
void CwiseClipping(int8_t* vector, const int v_size,
|
|
||||||
const int8_t clipping_value);
|
|
||||||
|
|
||||||
// Dot product of two vectors.
|
|
||||||
float VectorVectorDotProduct(const float* vector1, const float* vector2,
|
|
||||||
int v_size);
|
|
||||||
|
|
||||||
// Dot product of two batch vectors of size n_batch * v_size:
|
|
||||||
// vector1 = [x_1_1, x_1_2, ..., x_1_vsize,
|
|
||||||
// x_2_1, x_2_2, ..., x_2_vsize,
|
|
||||||
// ...
|
|
||||||
// x_nbatch_1,..., x_nbatch_vsize]
|
|
||||||
// vector2 = [y_1_1, y_1_2, ..., y_1_vsize,
|
|
||||||
// y_2_1, y_2_2, ..., y_2_vsize,
|
|
||||||
// ...
|
|
||||||
// y_nbatch_1,..., y_nbatch_vsize]
|
|
||||||
// Then result will be a vector of n_batch size starting from 'result':
|
|
||||||
// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize,
|
|
||||||
// x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize,
|
|
||||||
// ...
|
|
||||||
// x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize]
|
|
||||||
template <typename T>
|
|
||||||
inline void BatchVectorBatchVectorDotProduct(const T* vector1, const T* vector2,
|
|
||||||
int v_size, int n_batch,
|
|
||||||
T* result) {
|
|
||||||
for (int b = 0; b < n_batch; b++) {
|
|
||||||
result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
|
|
||||||
vector1 += v_size;
|
|
||||||
vector2 += v_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Same as above but input is 16bit and output is 32bit.
|
|
||||||
void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
|
|
||||||
const int16_t* vector2, int v_size,
|
|
||||||
int n_batch, int32_t* result);
|
|
||||||
|
|
||||||
// Same as above, but inputs are 16bit integer and output is 16bit integer.
|
|
||||||
void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
|
|
||||||
const int16_t* batch_vector,
|
|
||||||
int n_batch, int32_t multiplier,
|
|
||||||
int shift, int16_t* result);
|
|
||||||
|
|
||||||
// Compute "1.0f - elements of vector" (used in CIFG).
|
|
||||||
void Sub1Vector(const float* vector, int v_size, float* result);
|
|
||||||
|
|
||||||
// Compute "1.0f - elements of vector" (used in CIFG) for int16 input.
|
|
||||||
// "vector" has range [0, 32767] because it is the output of sigmoid function.
|
|
||||||
void Sub1Vector(const int16_t* vector, int v_size, int16_t* result);
|
|
||||||
|
|
||||||
// Multiply all elements of vector with a scalar.
|
|
||||||
void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
|
|
||||||
float* result);
|
|
||||||
|
|
||||||
// Reduce-sum on a float input vector:
|
|
||||||
// input_vector: float pointer to input vector.
|
|
||||||
// output_vector: float pointer to vector.
|
|
||||||
// output_size: output vector size.
|
|
||||||
// reduction_size: number of consecutive elements from input vector which are
|
|
||||||
// added to get one element of output.
|
|
||||||
void ReductionSumVector(const float* input_vector, float* output_vector,
|
|
||||||
int output_size, int reduction_size);
|
|
||||||
|
|
||||||
// Same as above but input/output is 32 bit integer.
|
|
||||||
void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
|
|
||||||
int output_size, int reduction_size);
|
|
||||||
|
|
||||||
// Same as above but input is 8 bit integer.
|
|
||||||
void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
|
|
||||||
int output_size, int reduction_size);
|
|
||||||
|
|
||||||
// Layer norm for each batch.
|
|
||||||
void MeanStddevNormalization(const float* input_vector, float* output_vector,
|
|
||||||
int v_size, int n_batch);
|
|
||||||
|
|
||||||
// Saturate Add with rescale on both inputs.
|
|
||||||
void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
|
|
||||||
const int8_t* recurrent, int8_t recurrent_zp,
|
|
||||||
int32_t input_effective_scale_a,
|
|
||||||
int32_t input_effective_scale_b,
|
|
||||||
int32_t recurrent_effective_scale_a,
|
|
||||||
int32_t recurrent_effective_scale_b, int32_t n_batch,
|
|
||||||
int32_t n_cell, int16_t* output);
|
|
||||||
|
|
||||||
} // namespace tensor_utils
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
|
|
||||||
@@ -1,416 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cmath>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
// These constants are used to manipulate the binary representation of doubles.
|
|
||||||
// Double-precision binary64 floating point format is:
|
|
||||||
// Bit | 63 | 62-52 | 51-0 |
|
|
||||||
// | Sign | Exponent | Fraction |
|
|
||||||
// To avoid 64-bit integers as much as possible, I break this into high and
|
|
||||||
// low 32-bit chunks. High is:
|
|
||||||
// Bit | 31 | 30-20 | 19-0 |
|
|
||||||
// | Sign | Exponent | High Fraction |
|
|
||||||
// Low is:
|
|
||||||
// Bit | 31-0 |
|
|
||||||
// | Low Fraction |
|
|
||||||
// We then access the components through logical bit-wise operations to
|
|
||||||
// extract the parts needed, with the positions and masks derived from the
|
|
||||||
// layout shown above.
|
|
||||||
constexpr uint64_t kSignMask = 0x8000000000000000LL;
|
|
||||||
constexpr uint64_t kExponentMask = 0x7ff0000000000000LL;
|
|
||||||
constexpr int32_t kExponentShift = 52;
|
|
||||||
constexpr int32_t kExponentBias = 1023;
|
|
||||||
constexpr uint32_t kExponentIsBadNum = 0x7ff;
|
|
||||||
constexpr uint64_t kFractionMask = 0x000fffffffc00000LL;
|
|
||||||
constexpr uint32_t kFractionShift = 22;
|
|
||||||
constexpr uint32_t kFractionRoundingMask = 0x003fffff;
|
|
||||||
constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
|
|
||||||
int* shift) {
|
|
||||||
#if TFLITE_SINGLE_ROUNDING
|
|
||||||
// Single-rounding MultiplyByQuantizedMultiplier only supports positive
|
|
||||||
// multipliers.
|
|
||||||
// TFLITE_DCHECK(double_multiplier >= 0);
|
|
||||||
#endif
|
|
||||||
if (double_multiplier == 0.) {
|
|
||||||
*quantized_multiplier = 0;
|
|
||||||
*shift = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#ifdef TFLITE_EMULATE_FLOAT
|
|
||||||
// If we're trying to avoid the use of floating-point instructions (for
|
|
||||||
// example on microcontrollers) then use an alternative implementation
|
|
||||||
// that only requires integer and bitwise operations. To enable this, you
|
|
||||||
// need to set the define during the build process for your platform.
|
|
||||||
int64_t q_fixed = IntegerFrExp(double_multiplier, shift);
|
|
||||||
#else // TFLITE_EMULATE_FLOAT
|
|
||||||
const double q = std::frexp(double_multiplier, shift);
|
|
||||||
auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1LL << 31)));
|
|
||||||
#endif // TFLITE_EMULATE_FLOAT
|
|
||||||
TFLITE_CHECK(q_fixed <= (1LL << 31));
|
|
||||||
if (q_fixed == (1LL << 31)) {
|
|
||||||
q_fixed /= 2;
|
|
||||||
++*shift;
|
|
||||||
}
|
|
||||||
TFLITE_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
|
|
||||||
// A shift amount smaller than -31 would cause all bits to be shifted out
|
|
||||||
// and thus all results would be zero. We implement that instead with
|
|
||||||
// q_fixed==0, so as to avoid hitting issues with right-shift
|
|
||||||
// operations with shift amounts greater than 31. Note that this happens
|
|
||||||
// roughly when abs(double_multiplier) < 2^-31 and the present handling means
|
|
||||||
// that we're effectively flushing tiny double_multiplier's to zero.
|
|
||||||
// We could conceivably handle values in the range (roughly) [32, 63]
|
|
||||||
// as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
|
|
||||||
// the present handling is just doing 'flush denormals to zero'. We could
|
|
||||||
// reconsider and actually generate nonzero denormals if a need arises.
|
|
||||||
if (*shift < -31) {
|
|
||||||
*shift = 0;
|
|
||||||
q_fixed = 0;
|
|
||||||
}
|
|
||||||
#if TFLITE_SINGLE_ROUNDING
|
|
||||||
// Single-rounding MultiplyByQuantizedMultiplier doesn't support a shift > 30,
|
|
||||||
// saturate it.
|
|
||||||
if (*shift > 30) {
|
|
||||||
*shift = 30;
|
|
||||||
q_fixed = (1LL << 31) - 1;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
*quantized_multiplier = static_cast<int32_t>(q_fixed);
|
|
||||||
}
|
|
||||||
|
|
||||||
void QuantizeMultiplierGreaterThanOne(double double_multiplier,
|
|
||||||
int32_t* quantized_multiplier,
|
|
||||||
int* left_shift) {
|
|
||||||
TFLITE_CHECK_GT(double_multiplier, 1.);
|
|
||||||
QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift);
|
|
||||||
TFLITE_CHECK_GE(*left_shift, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
|
|
||||||
int32_t* quantized_multiplier,
|
|
||||||
int* left_shift) {
|
|
||||||
TFLITE_CHECK_LT(double_multiplier, 1.);
|
|
||||||
TFLITE_CHECK_GT(double_multiplier, 0.);
|
|
||||||
int shift;
|
|
||||||
QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
|
|
||||||
TFLITE_CHECK_LE(shift, 0);
|
|
||||||
*left_shift = shift;
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t IntegerFrExp(double input, int* shift) {
|
|
||||||
// Make sure our assumptions about the double layout hold.
|
|
||||||
TFLITE_CHECK_EQ(8, sizeof(double));
|
|
||||||
|
|
||||||
// We want to access the bits of the input double value directly, which is
|
|
||||||
// tricky to do safely, so use a union to handle the casting.
|
|
||||||
union {
|
|
||||||
double double_value;
|
|
||||||
uint64_t double_as_uint;
|
|
||||||
} cast_union;
|
|
||||||
cast_union.double_value = input;
|
|
||||||
const uint64_t u = cast_union.double_as_uint;
|
|
||||||
|
|
||||||
// If the bitfield is all zeros apart from the sign bit, this is a normalized
|
|
||||||
// zero value, so return standard values for this special case.
|
|
||||||
if ((u & ~kSignMask) == 0) {
|
|
||||||
*shift = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Deal with NaNs and Infs, which are always indicated with a fixed pattern in
|
|
||||||
// the exponent, and distinguished by whether the fractions are zero or
|
|
||||||
// non-zero.
|
|
||||||
const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift);
|
|
||||||
if (exponent_part == kExponentIsBadNum) {
|
|
||||||
*shift = std::numeric_limits<int>::max();
|
|
||||||
if (u & kFractionMask) {
|
|
||||||
// NaN, so just return zero (with the exponent set to INT_MAX).
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
// Infinity, so return +/- INT_MAX.
|
|
||||||
if (u & kSignMask) {
|
|
||||||
return std::numeric_limits<int64_t>::min();
|
|
||||||
} else {
|
|
||||||
return std::numeric_limits<int64_t>::max();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// The shift is fairly easy to extract from the high bits of the double value,
|
|
||||||
// just by masking it out and applying a bias. The std::frexp() implementation
|
|
||||||
// always returns values between 0.5 and 1.0 though, whereas the exponent
|
|
||||||
// assumes 1.0 to 2.0 is the standard range, so I add on one to match that
|
|
||||||
// interface.
|
|
||||||
*shift = (exponent_part - kExponentBias) + 1;
|
|
||||||
|
|
||||||
// There's an implicit high bit in the double format definition, so make sure
|
|
||||||
// we include that at the top, and then reconstruct the rest of the fractional
|
|
||||||
// value from the remaining fragments.
|
|
||||||
int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift);
|
|
||||||
|
|
||||||
// We're cutting off some bits at the bottom, so to exactly match the standard
|
|
||||||
// frexp implementation here we'll apply rounding by adding one to the least
|
|
||||||
// significant bit of the result if the discarded portion is over half of the
|
|
||||||
// maximum.
|
|
||||||
if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) {
|
|
||||||
fraction += 1;
|
|
||||||
}
|
|
||||||
// Negate the fraction if the sign bit was set.
|
|
||||||
if (u & kSignMask) {
|
|
||||||
fraction *= -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return fraction;
|
|
||||||
}
|
|
||||||
|
|
||||||
double DoubleFromFractionAndShift(int64_t fraction, int shift) {
|
|
||||||
union {
|
|
||||||
double double_value;
|
|
||||||
uint64_t double_as_uint;
|
|
||||||
} result;
|
|
||||||
|
|
||||||
// Detect NaNs and infinities.
|
|
||||||
if (shift == std::numeric_limits<int>::max()) {
|
|
||||||
if (fraction == 0) {
|
|
||||||
return std::numeric_limits<double>::quiet_NaN();
|
|
||||||
} else if (fraction > 0) {
|
|
||||||
return std::numeric_limits<double>::infinity();
|
|
||||||
} else {
|
|
||||||
return -std::numeric_limits<double>::infinity();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return a normalized zero for a zero fraction.
|
|
||||||
if (fraction == 0) {
|
|
||||||
result.double_as_uint = 0;
|
|
||||||
return result.double_value;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_negative = (fraction < 0);
|
|
||||||
int64_t encoded_fraction = is_negative ? -fraction : fraction;
|
|
||||||
int64_t encoded_shift = (shift - 1);
|
|
||||||
while (encoded_fraction < 0x40000000) {
|
|
||||||
encoded_fraction *= 2;
|
|
||||||
encoded_shift -= 1;
|
|
||||||
}
|
|
||||||
while (encoded_fraction > 0x80000000) {
|
|
||||||
encoded_fraction /= 2;
|
|
||||||
encoded_shift += 1;
|
|
||||||
}
|
|
||||||
encoded_fraction -= 0x40000000;
|
|
||||||
if (encoded_shift < -1022) {
|
|
||||||
encoded_shift = -1023;
|
|
||||||
} else if (encoded_shift > 1022) {
|
|
||||||
encoded_shift = 1023;
|
|
||||||
}
|
|
||||||
encoded_shift += kExponentBias;
|
|
||||||
uint64_t encoded_sign = is_negative ? kSignMask : 0;
|
|
||||||
result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) |
|
|
||||||
(encoded_fraction << kFractionShift);
|
|
||||||
return result.double_value;
|
|
||||||
}
|
|
||||||
|
|
||||||
double IntegerDoubleMultiply(double a, double b) {
|
|
||||||
int a_shift;
|
|
||||||
const int64_t a_fraction = IntegerFrExp(a, &a_shift);
|
|
||||||
int b_shift;
|
|
||||||
const int64_t b_fraction = IntegerFrExp(b, &b_shift);
|
|
||||||
// Detect NaNs and infinities.
|
|
||||||
if (a_shift == std::numeric_limits<int>::max() ||
|
|
||||||
(b_shift == std::numeric_limits<int>::max())) {
|
|
||||||
return std::numeric_limits<double>::quiet_NaN();
|
|
||||||
}
|
|
||||||
const int result_shift = a_shift + b_shift + 1;
|
|
||||||
const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
|
|
||||||
return DoubleFromFractionAndShift(result_fraction, result_shift);
|
|
||||||
}
|
|
||||||
|
|
||||||
int IntegerDoubleCompare(double a, double b) {
|
|
||||||
int a_shift;
|
|
||||||
const int64_t a_fraction = IntegerFrExp(a, &a_shift);
|
|
||||||
int b_shift;
|
|
||||||
const int64_t b_fraction = IntegerFrExp(b, &b_shift);
|
|
||||||
|
|
||||||
// Detect NaNs and infinities.
|
|
||||||
if (a_shift == std::numeric_limits<int>::max() ||
|
|
||||||
(b_shift == std::numeric_limits<int>::max())) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((a_fraction == 0) && (b_fraction < 0)) {
|
|
||||||
return 1;
|
|
||||||
} else if ((a_fraction < 0) && (b_fraction == 0)) {
|
|
||||||
return -1;
|
|
||||||
} else if (a_shift < b_shift) {
|
|
||||||
return -1;
|
|
||||||
} else if (a_shift > b_shift) {
|
|
||||||
return 1;
|
|
||||||
} else if (a_fraction < b_fraction) {
|
|
||||||
return -1;
|
|
||||||
} else if (a_fraction > b_fraction) {
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void PreprocessSoftmaxScaling(double beta, double input_scale,
|
|
||||||
int input_integer_bits,
|
|
||||||
int32_t* quantized_multiplier, int* left_shift) {
|
|
||||||
// If the overall multiplier (input and beta) is large, then exp() of an
|
|
||||||
// input difference of 1 scaled by this will be large. In other words, we
|
|
||||||
// can cap the multiplier and know that, when it is used, the output will be
|
|
||||||
// (round to) zero wherever the input is not at the maximum value.
|
|
||||||
|
|
||||||
// If the overall scale is less than one, and input_integer_bits=0, then the
|
|
||||||
// result is double equivalent of Q0.31 (actually with more precision). Thus
|
|
||||||
// this generates a Q(input_integer_bits).(31-input_integer_bits)
|
|
||||||
// representation.
|
|
||||||
#if TFLITE_SINGLE_ROUNDING
|
|
||||||
const double max_real_multiplier = (1LL << 30) - 1.0;
|
|
||||||
#else
|
|
||||||
const double max_real_multiplier = (1LL << 31) - 1.0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef TFLITE_EMULATE_FLOAT
|
|
||||||
const double input_beta = IntegerDoubleMultiply(beta, input_scale);
|
|
||||||
int shift;
|
|
||||||
int64_t fraction = IntegerFrExp(input_beta, &shift);
|
|
||||||
shift += (31 - input_integer_bits);
|
|
||||||
double input_beta_real_multiplier =
|
|
||||||
DoubleFromFractionAndShift(fraction, shift);
|
|
||||||
if (IntegerDoubleCompare(input_beta_real_multiplier, max_real_multiplier) >
|
|
||||||
0) {
|
|
||||||
input_beta_real_multiplier = max_real_multiplier;
|
|
||||||
}
|
|
||||||
#else // TFLITE_EMULATE_FLOAT
|
|
||||||
const double input_beta_real_multiplier =
|
|
||||||
std::min<double>(beta * input_scale * (1 << (31 - input_integer_bits)),
|
|
||||||
max_real_multiplier);
|
|
||||||
#endif // TFLITE_EMULATE_FLOAT
|
|
||||||
|
|
||||||
QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
|
|
||||||
quantized_multiplier, left_shift);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
|
|
||||||
int input_integer_bits,
|
|
||||||
int32_t* quantized_multiplier,
|
|
||||||
int* left_shift,
|
|
||||||
int32_t* reverse_scaling_divisor,
|
|
||||||
int* reverse_scaling_left_shift) {
|
|
||||||
PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits,
|
|
||||||
quantized_multiplier, left_shift);
|
|
||||||
|
|
||||||
// Also calculate what amounts to the inverse scaling factor for the input.
|
|
||||||
const double real_reverse_scaling_divisor =
|
|
||||||
(1 << (31 - *left_shift)) / static_cast<double>(*quantized_multiplier);
|
|
||||||
tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor,
|
|
||||||
reverse_scaling_divisor,
|
|
||||||
reverse_scaling_left_shift);
|
|
||||||
}
|
|
||||||
|
|
||||||
int CalculateInputRadius(int input_integer_bits, int input_left_shift,
|
|
||||||
int total_signed_bits) {
|
|
||||||
#ifdef TFLITE_EMULATE_FLOAT
|
|
||||||
int64_t result = (1 << input_integer_bits) - 1;
|
|
||||||
result <<= (total_signed_bits - input_integer_bits);
|
|
||||||
result >>= input_left_shift;
|
|
||||||
return result;
|
|
||||||
#else // TFLITE_EMULATE_FLOAT
|
|
||||||
const double max_input_rescaled =
|
|
||||||
1.0 * ((1 << input_integer_bits) - 1) *
|
|
||||||
(1LL << (total_signed_bits - input_integer_bits)) /
|
|
||||||
(1LL << input_left_shift);
|
|
||||||
// Tighten bound using floor. Suppose that we could use the exact value.
|
|
||||||
// After scaling the difference, the result would be at the maximum. Thus we
|
|
||||||
// must ensure that our value has lower magnitude.
|
|
||||||
return static_cast<int>(std::floor(max_input_rescaled));
|
|
||||||
#endif // TFLITE_EMULATE_FLOAT
|
|
||||||
}
|
|
||||||
|
|
||||||
void NudgeQuantizationRange(const float min, const float max,
|
|
||||||
const int quant_min, const int quant_max,
|
|
||||||
float* nudged_min, float* nudged_max,
|
|
||||||
float* nudged_scale) {
|
|
||||||
// This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
|
|
||||||
const float quant_min_float = static_cast<float>(quant_min);
|
|
||||||
const float quant_max_float = static_cast<float>(quant_max);
|
|
||||||
*nudged_scale = (max - min) / (quant_max_float - quant_min_float);
|
|
||||||
const float zero_point_from_min = quant_min_float - min / *nudged_scale;
|
|
||||||
uint16_t nudged_zero_point;
|
|
||||||
if (zero_point_from_min < quant_min_float) {
|
|
||||||
nudged_zero_point = static_cast<uint16_t>(quant_min);
|
|
||||||
} else if (zero_point_from_min > quant_max_float) {
|
|
||||||
nudged_zero_point = static_cast<uint16_t>(quant_max);
|
|
||||||
} else {
|
|
||||||
nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
|
|
||||||
}
|
|
||||||
*nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
|
|
||||||
*nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
|
|
||||||
}
|
|
||||||
|
|
||||||
void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
|
|
||||||
const float nudged_max, const float* input_data,
|
|
||||||
float* output_data, const float size) {
|
|
||||||
// This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
|
|
||||||
const float inv_nudged_scale = 1.0f / nudged_scale;
|
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
const float src_val = input_data[i];
|
|
||||||
const float clamped = std::min(nudged_max, std::max(nudged_min, src_val));
|
|
||||||
const float clamped_shifted = clamped - nudged_min;
|
|
||||||
const float dst_val =
|
|
||||||
TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale +
|
|
||||||
nudged_min;
|
|
||||||
output_data[i] = dst_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CheckedLog2(const float x, int* log2_result) {
|
|
||||||
// Using TfLiteRound instead of std::round and std::log instead of
|
|
||||||
// std::log2 to work around these functions being missing in a toolchain
|
|
||||||
// used in some TensorFlow tests as of May 2018.
|
|
||||||
const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
|
|
||||||
const float x_log2_rounded = TfLiteRound(x_log2);
|
|
||||||
const float x_log2_fracpart = x_log2 - x_log2_rounded;
|
|
||||||
|
|
||||||
*log2_result = static_cast<int>(x_log2_rounded);
|
|
||||||
return std::abs(x_log2_fracpart) < 1e-3f;
|
|
||||||
}
|
|
||||||
|
|
||||||
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
|
|
||||||
int32_t* effective_scale_significand,
|
|
||||||
int* effective_shift) {
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i],
|
|
||||||
&effective_shift[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
@@ -1,292 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
// Given the min and max values of a float array, return
|
|
||||||
// reasonable quantization parameters to use for this array.
|
|
||||||
template <typename T>
|
|
||||||
QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
|
|
||||||
bool narrow_range) {
|
|
||||||
const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
|
|
||||||
const T qmax = std::numeric_limits<T>::max();
|
|
||||||
const double qmin_double = qmin;
|
|
||||||
const double qmax_double = qmax;
|
|
||||||
// 0 should always be a representable value. Let's assume that the initial
|
|
||||||
// min,max range contains 0.
|
|
||||||
TFLITE_CHECK_LE(rmin, 0.);
|
|
||||||
TFLITE_CHECK_GE(rmax, 0.);
|
|
||||||
if (rmin == rmax) {
|
|
||||||
// Special case where the min,max range is a point. Should be {0}.
|
|
||||||
TFLITE_CHECK_EQ(rmin, 0.);
|
|
||||||
TFLITE_CHECK_EQ(rmax, 0.);
|
|
||||||
QuantizationParams quantization_params;
|
|
||||||
quantization_params.zero_point = 0;
|
|
||||||
quantization_params.scale = 0.;
|
|
||||||
return quantization_params;
|
|
||||||
}
|
|
||||||
|
|
||||||
// General case.
|
|
||||||
//
|
|
||||||
// First determine the scale.
|
|
||||||
const double scale = (rmax - rmin) / (qmax_double - qmin_double);
|
|
||||||
|
|
||||||
// Zero-point computation.
|
|
||||||
// First the initial floating-point computation. The zero-point can be
|
|
||||||
// determined from solving an affine equation for any known pair
|
|
||||||
// (real value, corresponding quantized value).
|
|
||||||
// We know two such pairs: (rmin, qmin) and (rmax, qmax).
|
|
||||||
// The arithmetic error on the zero point computed from either pair
|
|
||||||
// will be roughly machine_epsilon * (sum of absolute values of terms)
|
|
||||||
// so we want to use the variant that adds the smaller terms.
|
|
||||||
const double zero_point_from_min = qmin_double - rmin / scale;
|
|
||||||
const double zero_point_from_max = qmax_double - rmax / scale;
|
|
||||||
const double zero_point_from_min_error =
|
|
||||||
std::abs(qmin_double) + std::abs(rmin / scale);
|
|
||||||
const double zero_point_from_max_error =
|
|
||||||
std::abs(qmax_double) + std::abs(rmax / scale);
|
|
||||||
|
|
||||||
const double zero_point_double =
|
|
||||||
zero_point_from_min_error < zero_point_from_max_error
|
|
||||||
? zero_point_from_min
|
|
||||||
: zero_point_from_max;
|
|
||||||
|
|
||||||
// Now we need to nudge the zero point to be an integer
|
|
||||||
// (our zero points are integer, and this is motivated by the requirement
|
|
||||||
// to be able to represent the real value "0" exactly as a quantized value,
|
|
||||||
// which is required in multiple places, for example in Im2col with SAME
|
|
||||||
// padding).
|
|
||||||
T nudged_zero_point = 0;
|
|
||||||
if (zero_point_double < qmin_double) {
|
|
||||||
nudged_zero_point = qmin;
|
|
||||||
} else if (zero_point_double > qmax_double) {
|
|
||||||
nudged_zero_point = qmax;
|
|
||||||
} else {
|
|
||||||
nudged_zero_point = static_cast<T>(round(zero_point_double));
|
|
||||||
}
|
|
||||||
// The zero point should always be in the range of quantized value,
|
|
||||||
// [qmin, qmax].
|
|
||||||
TFLITE_CHECK_GE(nudged_zero_point, qmin);
|
|
||||||
TFLITE_CHECK_LE(nudged_zero_point, qmax);
|
|
||||||
|
|
||||||
// Finally, store the result nudged quantization params.
|
|
||||||
QuantizationParams quantization_params;
|
|
||||||
quantization_params.zero_point = nudged_zero_point;
|
|
||||||
quantization_params.scale = scale;
|
|
||||||
return quantization_params;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
QuantizationParams ChooseQuantizationParams(double rmin, double rmax) {
|
|
||||||
return ChooseQuantizationParams<T>(rmin, rmax, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Converts a floating-point number to an integer. For all inputs x where
|
|
||||||
// static_cast<IntOut>(x) is legal according to the C++ standard, the result
|
|
||||||
// is identical to that cast (i.e. the result is x with its fractional part
|
|
||||||
// truncated whenever that is representable as IntOut).
|
|
||||||
//
|
|
||||||
// static_cast would cause undefined behavior for the following cases, which
|
|
||||||
// have well-defined behavior for this function:
|
|
||||||
//
|
|
||||||
// 1. If x is NaN, the result is zero.
|
|
||||||
//
|
|
||||||
// 2. If the truncated form of x is above the representable range of IntOut,
|
|
||||||
// the result is std::numeric_limits<IntOut>::max().
|
|
||||||
//
|
|
||||||
// 3. If the truncated form of x is below the representable range of IntOut,
|
|
||||||
// the result is std::numeric_limits<IntOut>::min().
|
|
||||||
//
|
|
||||||
// Note that cases #2 and #3 cover infinities as well as finite numbers.
|
|
||||||
//
|
|
||||||
// The range of FloatIn must include the range of IntOut, otherwise
|
|
||||||
// the results are undefined.
|
|
||||||
// TODO(sfeuz): Replace by absl::SafeCast once available.
|
|
||||||
template <class IntOut, class FloatIn>
|
|
||||||
IntOut SafeCast(FloatIn x) {
|
|
||||||
static_assert(!std::numeric_limits<FloatIn>::is_integer,
|
|
||||||
"FloatIn is integer");
|
|
||||||
static_assert(std::numeric_limits<IntOut>::is_integer,
|
|
||||||
"IntOut is not integer");
|
|
||||||
static_assert(std::numeric_limits<IntOut>::radix == 2, "IntOut is base 2");
|
|
||||||
|
|
||||||
// Special case NaN, for which the logic below doesn't work.
|
|
||||||
if (std::isnan(x)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Negative values all clip to zero for unsigned results.
|
|
||||||
if (!std::numeric_limits<IntOut>::is_signed && x < 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle infinities.
|
|
||||||
if (std::isinf(x)) {
|
|
||||||
return x < 0 ? std::numeric_limits<IntOut>::min()
|
|
||||||
: std::numeric_limits<IntOut>::max();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0),
|
|
||||||
// unless x is zero in which case exp == 0. Note that this implies that the
|
|
||||||
// magnitude of x is strictly less than 2^exp.
|
|
||||||
int exp = 0;
|
|
||||||
std::frexp(x, &exp);
|
|
||||||
|
|
||||||
// Let N be the number of non-sign bits in the representation of IntOut. If
|
|
||||||
// the magnitude of x is strictly less than 2^N, the truncated version of x
|
|
||||||
// is representable as IntOut. The only representable integer for which this
|
|
||||||
// is not the case is kMin for signed types (i.e. -2^N), but that is covered
|
|
||||||
// by the fall-through below.
|
|
||||||
if (exp <= std::numeric_limits<IntOut>::digits) {
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle numbers with magnitude >= 2^N.
|
|
||||||
return x < 0 ? std::numeric_limits<IntOut>::min()
|
|
||||||
: std::numeric_limits<IntOut>::max();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decompose a double multiplier into a Q0.31 int32 representation of its
|
|
||||||
// significand, and shift representation of NEGATIVE its exponent ---
|
|
||||||
// this is intended as a RIGHT-shift.
|
|
||||||
//
|
|
||||||
// Restricted to the case where the multiplier < 1 (and non-negative).
|
|
||||||
void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
|
|
||||||
int32_t* quantized_multiplier,
|
|
||||||
int* left_shift);
|
|
||||||
|
|
||||||
// Decompose a double multiplier into a Q0.31 int32 representation of its
|
|
||||||
// significand, and shift representation of its exponent.
|
|
||||||
//
|
|
||||||
// Restricted to the case where the multiplier > 1.
|
|
||||||
void QuantizeMultiplierGreaterThanOne(double double_multiplier,
|
|
||||||
int32_t* quantized_multiplier,
|
|
||||||
int* left_shift);
|
|
||||||
|
|
||||||
// Decompose a double multiplier into a Q0.31 int32 representation of its
|
|
||||||
// significand, and shift representation of its exponent.
|
|
||||||
//
|
|
||||||
// Handles an arbitrary positive multiplier. The 'shift' output-value is
|
|
||||||
// basically the 'floating-point exponent' of the multiplier:
|
|
||||||
// Negative for a right-shift (when the multiplier is <1), positive for a
|
|
||||||
// left-shift (when the multiplier is >1)
|
|
||||||
void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
|
|
||||||
int* shift);
|
|
||||||
|
|
||||||
// Splits a double input value into a returned fraction, and a shift value from
|
|
||||||
// the exponent, using only bitwise and integer operations to support
|
|
||||||
// microcontrollers and other environments without floating-point support.
|
|
||||||
//
|
|
||||||
// This is designed to be a replacement for how std::frexp() is used within the
|
|
||||||
// QuantizeMultiplier() function, and so has a different signature than the
|
|
||||||
// standard version, returning a 64-bit integer rather than a double. This
|
|
||||||
// result has a maximum value of 1<<31, with the fraction expressed as a
|
|
||||||
// proportion of that maximum.
|
|
||||||
//
|
|
||||||
// std::frexp() returns NaNs and infinities unmodified, but since we're
|
|
||||||
// returning integers that can't represent those values, instead we return
|
|
||||||
// a shift of std::numeric_limits<int>::max() for all bad numbers, with an int64
|
|
||||||
// result of 0 for NaNs, std:numeric_limits<int64_t>::max() for +INFINITY, and
|
|
||||||
// std::numeric_limits<int64_t>::min() for -INFINITY. Denormalized inputs will
|
|
||||||
// result in return values that end up truncating some bits at the end,
|
|
||||||
// reflecting the loss of precision inherent in denormalization.
|
|
||||||
int64_t IntegerFrExp(double input, int* shift);
|
|
||||||
|
|
||||||
// Converts an integer fraction in the format produced by IntegerFrExp (where
|
|
||||||
// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an
|
|
||||||
// IEEE binary64 double format result. The implementation uses only integer and
|
|
||||||
// bitwise operators, so no floating point hardware support or emulation is
|
|
||||||
// needed. This is here so quantized operations can run non-time-critical
|
|
||||||
// preparation calculations on microcontrollers and other platforms without
|
|
||||||
// float support.
|
|
||||||
double DoubleFromFractionAndShift(int64_t fraction, int shift);
|
|
||||||
|
|
||||||
// Performs a multiplication of two numbers in double format, using only integer
|
|
||||||
// and bitwise instructions. This is aimed at supporting housekeeping functions
|
|
||||||
// for quantized operations on microcontrollers without floating-point hardware.
|
|
||||||
double IntegerDoubleMultiply(double a, double b);
|
|
||||||
|
|
||||||
// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is
|
|
||||||
// greater than b. It is implemented using only integer and logical instructions
|
|
||||||
// so that it can be easily run on microcontrollers for quantized operations.
|
|
||||||
int IntegerDoubleCompare(double a, double b);
|
|
||||||
|
|
||||||
// This first creates a multiplier in a double equivalent of
|
|
||||||
// Q(input_integer_bits).(31-input_integer_bits) representation, with extra
|
|
||||||
// precision in the double's fractional bits. It then splits the result into
|
|
||||||
// significand and exponent.
|
|
||||||
void PreprocessSoftmaxScaling(double beta, double input_scale,
|
|
||||||
int input_integer_bits,
|
|
||||||
int32_t* quantized_multiplier, int* left_shift);
|
|
||||||
// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
|
|
||||||
void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
|
|
||||||
int input_integer_bits,
|
|
||||||
int32_t* quantized_multiplier,
|
|
||||||
int* left_shift,
|
|
||||||
int32_t* reverse_scaling_divisor,
|
|
||||||
int* reverse_scaling_left_shift);
|
|
||||||
// Calculate the largest input that will result in a within-bounds intermediate
|
|
||||||
// result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words,
|
|
||||||
// it must not overflow before we reduce the value by multiplication by the
|
|
||||||
// input multiplier. The negative radius is used as the minimum difference in
|
|
||||||
// Softmax.
|
|
||||||
int CalculateInputRadius(int input_integer_bits, int input_left_shift,
|
|
||||||
int total_signed_bits = 31);
|
|
||||||
|
|
||||||
// Nudges a min/max quantization range to ensure zero is zero.
|
|
||||||
// Gymnastics with nudged zero point is to ensure that real zero maps to
|
|
||||||
// an integer, which is required for e.g. zero-padding in convolutional layers.
|
|
||||||
// Outputs nudged_min, nudged_max, nudged_scale.
|
|
||||||
void NudgeQuantizationRange(const float min, const float max,
|
|
||||||
const int quant_min, const int quant_max,
|
|
||||||
float* nudged_min, float* nudged_max,
|
|
||||||
float* nudged_scale);
|
|
||||||
|
|
||||||
// Fake quantizes (quantizes and dequantizes) input_data using the scale,
|
|
||||||
// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code
|
|
||||||
// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor.
|
|
||||||
void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
|
|
||||||
const float nudged_max, const float* input_data,
|
|
||||||
float* output_data, const float size);
|
|
||||||
|
|
||||||
// If x is approximately a power of two (with any positive or negative
|
|
||||||
// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise
|
|
||||||
// returns false.
|
|
||||||
bool CheckedLog2(const float x, int* log2_result);
|
|
||||||
|
|
||||||
// Decomposes an array of double multipliers into a Q0.31 int32 representation
|
|
||||||
// of its significand, and shift representation of its exponent.
|
|
||||||
//
|
|
||||||
// Handles an arbitrary multiplier. The 'shift' output-value is
|
|
||||||
// basically the 'floating-point exponent' of the multiplier:
|
|
||||||
// Negative for a right-shift (when the multiplier is <1), positive for a
|
|
||||||
// left-shift (when the multiplier is >1)
|
|
||||||
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
|
|
||||||
int32_t* effective_scale_significand,
|
|
||||||
int* effective_shift);
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
|
|
||||||
@@ -1,400 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <type_traits>
|
|
||||||
|
|
||||||
#include "fixedpoint/fixedpoint.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void Add(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const T* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const T* input2_data,
|
|
||||||
const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
T activation_min, activation_max;
|
|
||||||
GetActivationParams(params, &activation_min, &activation_max);
|
|
||||||
|
|
||||||
const int flat_size =
|
|
||||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
|
||||||
for (int i = 0; i < flat_size; ++i) {
|
|
||||||
output_data[i] = ActivationFunctionWithMinMax(
|
|
||||||
input1_data[i] + input2_data[i], activation_min, activation_max);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Element-wise add that can often be used for inner loop of broadcast add as
|
|
||||||
// well as the non-broadcast add.
|
|
||||||
|
|
||||||
// This function is used for 8-bit as well as for 16-bit, but the accumulator
|
|
||||||
// is 32-bit for both cases. The overflow does not happen due to the
|
|
||||||
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
|
|
||||||
template <typename T>
|
|
||||||
inline void AddElementwise(int size, const ArithmeticParams& params,
|
|
||||||
const T* input1_data, const T* input2_data,
|
|
||||||
T* output_data) {
|
|
||||||
TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
|
|
||||||
TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
|
|
||||||
TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
|
|
||||||
TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
|
|
||||||
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
|
||||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
|
||||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
|
||||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
|
||||||
const int32_t scaled_input1_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
|
||||||
const int32_t scaled_input2_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
|
||||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
|
||||||
const int32_t raw_output =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
raw_sum, params.output_multiplier, params.output_shift) +
|
|
||||||
params.output_offset;
|
|
||||||
const int32_t clamped_output =
|
|
||||||
std::min(params.quantized_activation_max,
|
|
||||||
std::max(params.quantized_activation_min, raw_output));
|
|
||||||
output_data[i] = static_cast<T>(clamped_output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scalar-broadcast add that can be used for inner loop of more general
|
|
||||||
// broadcast add, so that, for example, scalar-broadcast with batch will still
|
|
||||||
// be fast.
|
|
||||||
inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
|
|
||||||
uint8_t input1_data, const uint8_t* input2_data,
|
|
||||||
uint8_t* output_data) {
|
|
||||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
|
||||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
|
||||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
|
||||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
|
||||||
|
|
||||||
const int32_t input1_val = params.input1_offset + input1_data;
|
|
||||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
|
||||||
const int32_t scaled_input1_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
|
||||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
|
||||||
const int32_t scaled_input2_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
|
||||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
|
||||||
const int32_t raw_output =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
raw_sum, params.output_multiplier, params.output_shift) +
|
|
||||||
params.output_offset;
|
|
||||||
const int32_t clamped_output =
|
|
||||||
std::min(params.quantized_activation_max,
|
|
||||||
std::max(params.quantized_activation_min, raw_output));
|
|
||||||
output_data[i] = static_cast<uint8_t>(clamped_output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Add(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const uint8_t* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const uint8_t* input2_data,
|
|
||||||
const RuntimeShape& output_shape, uint8_t* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
const int flat_size =
|
|
||||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
|
||||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
|
||||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
|
||||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
|
||||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void AddGeneralParamScale(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape,
|
|
||||||
const int16_t* input1_data,
|
|
||||||
const RuntimeShape& input2_shape,
|
|
||||||
const int16_t* input2_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
int16_t* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
const int flat_size =
|
|
||||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
|
||||||
|
|
||||||
int max_value = std::numeric_limits<int16_t>::max();
|
|
||||||
|
|
||||||
TFLITE_DCHECK_GT(params.input1_offset, -max_value);
|
|
||||||
TFLITE_DCHECK_GT(params.input2_offset, -max_value);
|
|
||||||
TFLITE_DCHECK_LT(params.input1_offset, max_value);
|
|
||||||
TFLITE_DCHECK_LT(params.input2_offset, max_value);
|
|
||||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Add(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const int16_t* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const int16_t* input2_data,
|
|
||||||
const RuntimeShape& output_shape, int16_t* output_data,
|
|
||||||
bool pot_scale = true) {
|
|
||||||
if (!pot_scale) {
|
|
||||||
AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
|
|
||||||
input2_data, output_shape, output_data);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
|
|
||||||
const int input1_shift = params.input1_shift;
|
|
||||||
const int flat_size =
|
|
||||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
|
||||||
const int16_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int16_t output_activation_max = params.quantized_activation_max;
|
|
||||||
|
|
||||||
TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
|
|
||||||
TFLITE_DCHECK_LE(input1_shift, 0);
|
|
||||||
TFLITE_DCHECK_LE(params.input2_shift, 0);
|
|
||||||
const int16_t* not_shift_input =
|
|
||||||
input1_shift == 0 ? input1_data : input2_data;
|
|
||||||
const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
|
|
||||||
const int input_right_shift =
|
|
||||||
input1_shift == 0 ? -params.input2_shift : -input1_shift;
|
|
||||||
|
|
||||||
for (int i = 0; i < flat_size; i++) {
|
|
||||||
// F0 uses 0 integer bits, range [-1, 1].
|
|
||||||
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
|
|
||||||
|
|
||||||
F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
|
|
||||||
F0 scaled_input = F0::FromRaw(
|
|
||||||
gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
|
|
||||||
F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
|
|
||||||
const int16_t raw_output = result.raw();
|
|
||||||
const int16_t clamped_output = std::min(
|
|
||||||
output_activation_max, std::max(output_activation_min, raw_output));
|
|
||||||
output_data[i] = clamped_output;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline typename std::enable_if<!is_small_integer<T>::value, void>::type
|
|
||||||
BroadcastAdd4DSlow(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const T* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const T* input2_data,
|
|
||||||
const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
NdArrayDesc<4> desc1;
|
|
||||||
NdArrayDesc<4> desc2;
|
|
||||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
|
||||||
&desc2);
|
|
||||||
const RuntimeShape extended_output_shape =
|
|
||||||
RuntimeShape::ExtendedShape(4, output_shape);
|
|
||||||
|
|
||||||
T activation_min, activation_max;
|
|
||||||
GetActivationParams(params, &activation_min, &activation_max);
|
|
||||||
|
|
||||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
|
||||||
// col, channel), with extents (batches, height, width, depth), with the
|
|
||||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
|
||||||
// typically 1 element).
|
|
||||||
//
|
|
||||||
// In generated C code, we store arrays with the dimensions reversed. The
|
|
||||||
// first dimension has smallest stride.
|
|
||||||
//
|
|
||||||
// We name our variables by their Tensorflow convention, but generate C code
|
|
||||||
// nesting loops such that the innermost loop has the smallest stride for the
|
|
||||||
// best cache behavior.
|
|
||||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
|
||||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
|
||||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
|
||||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
|
||||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
|
||||||
ActivationFunctionWithMinMax<T>(
|
|
||||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
|
|
||||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
|
|
||||||
activation_min, activation_max);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This function is used for 8-bit as well as for 16-bit, but the accumulator
|
|
||||||
// is 32-bit for both cases. The overflow does not happen due to the
|
|
||||||
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
|
|
||||||
template <typename T>
|
|
||||||
inline typename std::enable_if<is_small_integer<T>::value, void>::type
|
|
||||||
BroadcastAdd4DSlow(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const T* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const T* input2_data,
|
|
||||||
const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
NdArrayDesc<4> desc1;
|
|
||||||
NdArrayDesc<4> desc2;
|
|
||||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
|
||||||
&desc2);
|
|
||||||
const RuntimeShape extended_output_shape =
|
|
||||||
RuntimeShape::ExtendedShape(4, output_shape);
|
|
||||||
|
|
||||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
|
||||||
// col, channel), with extents (batches, height, width, depth), with the
|
|
||||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
|
||||||
// typically 1 element).
|
|
||||||
//
|
|
||||||
// In generated C code, we store arrays with the dimensions reversed. The
|
|
||||||
// first dimension has smallest stride.
|
|
||||||
//
|
|
||||||
// We name our variables by their Tensorflow convention, but generate C code
|
|
||||||
// nesting loops such that the innermost loop has the smallest stride for the
|
|
||||||
// best cache behavior.
|
|
||||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
|
||||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
|
||||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
|
||||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
|
||||||
const int32_t input1_val =
|
|
||||||
params.input1_offset +
|
|
||||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
|
||||||
const int32_t input2_val =
|
|
||||||
params.input2_offset +
|
|
||||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
|
||||||
const int32_t shifted_input1_val =
|
|
||||||
input1_val * (1 << params.left_shift);
|
|
||||||
const int32_t shifted_input2_val =
|
|
||||||
input2_val * (1 << params.left_shift);
|
|
||||||
const int32_t scaled_input1_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input1_val, params.input1_multiplier,
|
|
||||||
params.input1_shift);
|
|
||||||
const int32_t scaled_input2_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input2_val, params.input2_multiplier,
|
|
||||||
params.input2_shift);
|
|
||||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
|
||||||
const int32_t raw_output =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
raw_sum, params.output_multiplier, params.output_shift) +
|
|
||||||
params.output_offset;
|
|
||||||
const int32_t clamped_output =
|
|
||||||
std::min(params.quantized_activation_max,
|
|
||||||
std::max(params.quantized_activation_min, raw_output));
|
|
||||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
|
||||||
static_cast<T>(clamped_output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
|
|
||||||
const RuntimeShape& unswitched_input1_shape,
|
|
||||||
const uint8_t* unswitched_input1_data,
|
|
||||||
const RuntimeShape& unswitched_input2_shape,
|
|
||||||
const uint8_t* unswitched_input2_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
uint8_t* output_data) {
|
|
||||||
ArithmeticParams switched_params = unswitched_params;
|
|
||||||
switched_params.input1_offset = unswitched_params.input2_offset;
|
|
||||||
switched_params.input1_multiplier = unswitched_params.input2_multiplier;
|
|
||||||
switched_params.input1_shift = unswitched_params.input2_shift;
|
|
||||||
switched_params.input2_offset = unswitched_params.input1_offset;
|
|
||||||
switched_params.input2_multiplier = unswitched_params.input1_multiplier;
|
|
||||||
switched_params.input2_shift = unswitched_params.input1_shift;
|
|
||||||
|
|
||||||
const bool use_unswitched =
|
|
||||||
unswitched_params.broadcast_category ==
|
|
||||||
tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
|
|
||||||
|
|
||||||
const ArithmeticParams& params =
|
|
||||||
use_unswitched ? unswitched_params : switched_params;
|
|
||||||
const uint8_t* input1_data =
|
|
||||||
use_unswitched ? unswitched_input1_data : unswitched_input2_data;
|
|
||||||
const uint8_t* input2_data =
|
|
||||||
use_unswitched ? unswitched_input2_data : unswitched_input1_data;
|
|
||||||
|
|
||||||
// Fivefold nested loops. The second input resets its position for each
|
|
||||||
// iteration of the second loop. The first input resets its position at the
|
|
||||||
// beginning of the fourth loop. The innermost loop is an elementwise add of
|
|
||||||
// sections of the arrays.
|
|
||||||
uint8_t* output_data_ptr = output_data;
|
|
||||||
const uint8_t* input1_data_ptr = input1_data;
|
|
||||||
const uint8_t* input2_data_reset = input2_data;
|
|
||||||
// In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
|
|
||||||
// between input shapes. y3 for input 1 is always broadcast, and so the
|
|
||||||
// dimension there is 1, whereas optionally y1 might be broadcast for input 2.
|
|
||||||
// Put another way,
|
|
||||||
// input1.shape.FlatSize = y0 * y1 * y2 * y4,
|
|
||||||
// input2.shape.FlatSize = y0 * y2 * y3 * y4.
|
|
||||||
int y0 = params.broadcast_shape[0];
|
|
||||||
int y1 = params.broadcast_shape[1];
|
|
||||||
int y2 = params.broadcast_shape[2];
|
|
||||||
int y3 = params.broadcast_shape[3];
|
|
||||||
int y4 = params.broadcast_shape[4];
|
|
||||||
if (y4 > 1) {
|
|
||||||
// General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
|
|
||||||
// dimension.
|
|
||||||
for (int i0 = 0; i0 < y0; ++i0) {
|
|
||||||
const uint8_t* input2_data_ptr;
|
|
||||||
for (int i1 = 0; i1 < y1; ++i1) {
|
|
||||||
input2_data_ptr = input2_data_reset;
|
|
||||||
for (int i2 = 0; i2 < y2; ++i2) {
|
|
||||||
for (int i3 = 0; i3 < y3; ++i3) {
|
|
||||||
AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
|
|
||||||
output_data_ptr);
|
|
||||||
input2_data_ptr += y4;
|
|
||||||
output_data_ptr += y4;
|
|
||||||
}
|
|
||||||
// We have broadcast y4 of input1 data y3 times, and now move on.
|
|
||||||
input1_data_ptr += y4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
|
|
||||||
input2_data_reset = input2_data_ptr;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Special case of y4 == 1, in which the innermost loop is a single element
|
|
||||||
// and can be combined with the next (y3) as an inner broadcast.
|
|
||||||
//
|
|
||||||
// Note that this handles the case of pure scalar broadcast when
|
|
||||||
// y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
|
|
||||||
// broadcast with batch (as y2 > 1).
|
|
||||||
//
|
|
||||||
// NOTE The process is the same as the above general case except simplified
|
|
||||||
// for y4 == 1 and the loop over y3 is contained within the
|
|
||||||
// AddScalarBroadcast function.
|
|
||||||
for (int i0 = 0; i0 < y0; ++i0) {
|
|
||||||
const uint8_t* input2_data_ptr;
|
|
||||||
for (int i1 = 0; i1 < y1; ++i1) {
|
|
||||||
input2_data_ptr = input2_data_reset;
|
|
||||||
for (int i2 = 0; i2 < y2; ++i2) {
|
|
||||||
AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
|
|
||||||
output_data_ptr);
|
|
||||||
input2_data_ptr += y3;
|
|
||||||
output_data_ptr += y3;
|
|
||||||
input1_data_ptr += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
input2_data_reset = input2_data_ptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
|
|
||||||
@@ -1,86 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
// T is expected to be either float or int.
|
|
||||||
template <typename T>
|
|
||||||
inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
|
|
||||||
const T* const* input_data, T* output_data) {
|
|
||||||
// All inputs and output should have the same shape, this is checked during
|
|
||||||
// Prepare stage.
|
|
||||||
const size_t size = input_shape.FlatSize();
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
T x = 0;
|
|
||||||
for (size_t j = 0; j < num_inputs; ++j) {
|
|
||||||
x += input_data[j][i];
|
|
||||||
}
|
|
||||||
output_data[i] = x;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void AddN(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input_shape, const size_t num_inputs,
|
|
||||||
const int8_t* const* input_data, int8_t* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
// Input offset is negative input zero point. Activation tensors are
|
|
||||||
// asymmetric quantized so they span the full int8 range.
|
|
||||||
// All inputs should have same zero-point and scale, this is checked during
|
|
||||||
// Prepare stage.
|
|
||||||
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
|
|
||||||
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
|
|
||||||
|
|
||||||
// All inputs and output should have the same shape, this is checked during
|
|
||||||
// Prepare stage.
|
|
||||||
const size_t size = input_shape.FlatSize();
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
// accumulate in scaled_x before clamping to avoid overflow
|
|
||||||
const int32_t x = params.input1_offset; // x = 0
|
|
||||||
const int32_t shifted_x = x * (1 << params.left_shift);
|
|
||||||
int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_x, params.input1_multiplier, params.input1_shift);
|
|
||||||
|
|
||||||
for (size_t j = 0; j < num_inputs; ++j) {
|
|
||||||
const int32_t y = params.input1_offset + input_data[j][i];
|
|
||||||
const int32_t shifted_y = y * (1 << params.left_shift);
|
|
||||||
int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_y, params.input1_multiplier, params.input1_shift);
|
|
||||||
scaled_x += scaled_y;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int32_t raw_output =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
scaled_x, params.output_multiplier, params.output_shift) +
|
|
||||||
params.output_offset;
|
|
||||||
const int32_t clamped_output =
|
|
||||||
std::min(params.quantized_activation_max,
|
|
||||||
std::max(params.quantized_activation_min, raw_output));
|
|
||||||
output_data[i] = static_cast<int8_t>(clamped_output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
|
|
||||||
@@ -1,88 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
|
|
||||||
|
|
||||||
#include <functional>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
std::function<bool(T, T)> GetComparefunction(bool is_arg_max) {
|
|
||||||
if (is_arg_max) {
|
|
||||||
return std::greater<T>();
|
|
||||||
} else {
|
|
||||||
return std::less<T>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T1, typename T2, typename T3, typename Cmp>
|
|
||||||
void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
|
|
||||||
const T3* input2_data, const RuntimeShape& output_shape,
|
|
||||||
T2* output_data, const Cmp& cmp) {
|
|
||||||
TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
|
|
||||||
TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
|
|
||||||
output_shape.DimensionsCount());
|
|
||||||
int axis = input2_data[0];
|
|
||||||
if (axis < 0) {
|
|
||||||
axis += input1_shape.DimensionsCount();
|
|
||||||
}
|
|
||||||
const int axis_size = input1_shape.Dims(axis);
|
|
||||||
|
|
||||||
int outer_size = 1;
|
|
||||||
for (int i = 0; i < axis; ++i) {
|
|
||||||
TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
|
|
||||||
outer_size *= input1_shape.Dims(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
int inner_size = 1;
|
|
||||||
const int dims_count = input1_shape.DimensionsCount();
|
|
||||||
for (int i = axis + 1; i < dims_count; ++i) {
|
|
||||||
TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
|
|
||||||
inner_size *= input1_shape.Dims(i);
|
|
||||||
}
|
|
||||||
for (int outer = 0; outer < outer_size; ++outer) {
|
|
||||||
for (int inner = 0; inner < inner_size; ++inner) {
|
|
||||||
auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
|
|
||||||
T2 min_max_index = 0;
|
|
||||||
for (int i = 1; i < axis_size; ++i) {
|
|
||||||
const auto& curr_value =
|
|
||||||
input1_data[(outer * axis_size + i) * inner_size + inner];
|
|
||||||
if (cmp(curr_value, min_max_value)) {
|
|
||||||
min_max_value = curr_value;
|
|
||||||
min_max_index = static_cast<T2>(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
output_data[outer * inner_size + inner] = min_max_index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T1, typename T2, typename T3>
|
|
||||||
void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
|
|
||||||
const T3* input2_data, const RuntimeShape& output_shape,
|
|
||||||
T2* output_data, const bool is_arg_max) {
|
|
||||||
ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
|
|
||||||
GetComparefunction<T1>(is_arg_max));
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
|
|
||||||
@@ -1,275 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
namespace batch_matmul {
|
|
||||||
|
|
||||||
// Determine which dimension is the broadcast dimension.
|
|
||||||
inline int broadcast_dim(int lhs_dim, int rhs_dim) {
|
|
||||||
if (lhs_dim == rhs_dim) return lhs_dim;
|
|
||||||
if (lhs_dim == 1) return rhs_dim;
|
|
||||||
TFLITE_DCHECK_EQ(rhs_dim, 1);
|
|
||||||
return lhs_dim;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute the "extent" for iterating on this dimension.
|
|
||||||
// If we are broadcasting, then don't advance (i.e return 0).
|
|
||||||
inline int extent(const RuntimeShape& shape, int x) {
|
|
||||||
if (shape.Dims(x) == 1) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
int prod = 1;
|
|
||||||
for (int i = x + 1; i < shape.DimensionsCount(); ++i) {
|
|
||||||
prod *= shape.Dims(i);
|
|
||||||
}
|
|
||||||
return prod;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace batch_matmul
|
|
||||||
|
|
||||||
template <typename Ta, typename Tb, typename Tout>
|
|
||||||
inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data,
|
|
||||||
const RuntimeShape& rhs_shape, const Tb* rhs_data,
|
|
||||||
const RuntimeShape& output_shape, Tout* output_data) {
|
|
||||||
const RuntimeShape extended_lhs_shape =
|
|
||||||
RuntimeShape::ExtendedShape(5, lhs_shape);
|
|
||||||
const RuntimeShape extended_rhs_shape =
|
|
||||||
RuntimeShape::ExtendedShape(5, rhs_shape);
|
|
||||||
|
|
||||||
const int batch_dim0 = batch_matmul::broadcast_dim(
|
|
||||||
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
|
|
||||||
const int batch_dim1 = batch_matmul::broadcast_dim(
|
|
||||||
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
|
|
||||||
const int batch_dim2 = batch_matmul::broadcast_dim(
|
|
||||||
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
|
|
||||||
|
|
||||||
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
|
|
||||||
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
|
|
||||||
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
|
|
||||||
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
|
|
||||||
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
|
|
||||||
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
|
|
||||||
|
|
||||||
// Set params for each matrix multiply.
|
|
||||||
const int lhs_rows = extended_lhs_shape.Dims(3);
|
|
||||||
const int rhs_cols = extended_rhs_shape.Dims(4);
|
|
||||||
const int accum_depth = extended_lhs_shape.Dims(4);
|
|
||||||
|
|
||||||
for (int b0 = 0; b0 < batch_dim0; ++b0) {
|
|
||||||
const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
|
|
||||||
const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
|
|
||||||
for (int b1 = 0; b1 < batch_dim1; ++b1) {
|
|
||||||
const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
|
|
||||||
const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
|
|
||||||
for (int b2 = 0; b2 < batch_dim2; ++b2) {
|
|
||||||
const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
|
|
||||||
const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
|
|
||||||
Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
|
|
||||||
b1 * batch_dim2 + b2) *
|
|
||||||
lhs_rows * rhs_cols;
|
|
||||||
for (int j = 0; j < rhs_cols; ++j) {
|
|
||||||
for (int i = 0; i < lhs_rows; ++i) {
|
|
||||||
Tout total = 0;
|
|
||||||
for (int k = 0; k < accum_depth; ++k) {
|
|
||||||
total += static_cast<Tout>(lhs_ptr2[accum_depth * i + k]) *
|
|
||||||
static_cast<Tout>(rhs_ptr2[j * accum_depth + k]);
|
|
||||||
}
|
|
||||||
int idx = lhs_rows * j + i;
|
|
||||||
out_ptr[idx] = total;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data,
|
|
||||||
const RuntimeShape& rhs_shape, const int8_t* rhs_data,
|
|
||||||
const float* scaling_factors,
|
|
||||||
const int32_t* input_offset, int32_t* row_sums,
|
|
||||||
const RuntimeShape& output_shape, float* output_data,
|
|
||||||
bool* compute_row_sums) {
|
|
||||||
const RuntimeShape extended_lhs_shape =
|
|
||||||
RuntimeShape::ExtendedShape(5, lhs_shape);
|
|
||||||
const RuntimeShape extended_rhs_shape =
|
|
||||||
RuntimeShape::ExtendedShape(5, rhs_shape);
|
|
||||||
|
|
||||||
const int batch_dim0 = batch_matmul::broadcast_dim(
|
|
||||||
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
|
|
||||||
const int batch_dim1 = batch_matmul::broadcast_dim(
|
|
||||||
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
|
|
||||||
const int batch_dim2 = batch_matmul::broadcast_dim(
|
|
||||||
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
|
|
||||||
|
|
||||||
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
|
|
||||||
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
|
|
||||||
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
|
|
||||||
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
|
|
||||||
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
|
|
||||||
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
|
|
||||||
|
|
||||||
// Set params for each matrix multiply.
|
|
||||||
const int lhs_rows = extended_lhs_shape.Dims(3);
|
|
||||||
const int rhs_cols = extended_rhs_shape.Dims(4);
|
|
||||||
const int accum_depth = extended_lhs_shape.Dims(4);
|
|
||||||
|
|
||||||
const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols;
|
|
||||||
const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols;
|
|
||||||
const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols;
|
|
||||||
const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows;
|
|
||||||
const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows;
|
|
||||||
const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows;
|
|
||||||
|
|
||||||
if (!compute_row_sums || *compute_row_sums) {
|
|
||||||
int num_weights_matrices = 1;
|
|
||||||
for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) {
|
|
||||||
num_weights_matrices *= extended_lhs_shape.Dims(i);
|
|
||||||
}
|
|
||||||
tensor_utils::ReductionSumVector(
|
|
||||||
lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth);
|
|
||||||
if (compute_row_sums) {
|
|
||||||
*compute_row_sums = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int b0 = 0; b0 < batch_dim0; ++b0) {
|
|
||||||
const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
|
|
||||||
const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
|
|
||||||
const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0);
|
|
||||||
const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0);
|
|
||||||
const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0);
|
|
||||||
for (int b1 = 0; b1 < batch_dim1; ++b1) {
|
|
||||||
const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
|
|
||||||
const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
|
|
||||||
const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1);
|
|
||||||
const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1);
|
|
||||||
const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1);
|
|
||||||
for (int b2 = 0; b2 < batch_dim2; ++b2) {
|
|
||||||
const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
|
|
||||||
const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
|
|
||||||
const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2);
|
|
||||||
const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2);
|
|
||||||
const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2);
|
|
||||||
float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
|
|
||||||
b1 * batch_dim2 + b2) *
|
|
||||||
lhs_rows * rhs_cols;
|
|
||||||
for (int j = 0; j < rhs_cols; ++j) {
|
|
||||||
const float batch_scaling_factor = scale_ptr2[j];
|
|
||||||
const float batch_offset = static_cast<float>(ioff_ptr2[j]);
|
|
||||||
for (int i = 0; i < lhs_rows; ++i) {
|
|
||||||
int32_t total = 0;
|
|
||||||
for (int k = 0; k < accum_depth; ++k) {
|
|
||||||
total +=
|
|
||||||
lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k];
|
|
||||||
}
|
|
||||||
int32_t row_sum = woff_ptr2[i];
|
|
||||||
total -= row_sum * batch_offset;
|
|
||||||
int idx = lhs_rows * j + i;
|
|
||||||
out_ptr[idx] += batch_scaling_factor * total;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, typename AccumT>
|
|
||||||
inline void BatchMatMul(const FullyConnectedParams& params,
|
|
||||||
const RuntimeShape& lhs_shape, const T* lhs_data,
|
|
||||||
const RuntimeShape& rhs_shape, const T* rhs_data,
|
|
||||||
const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
const RuntimeShape extended_lhs_shape =
|
|
||||||
RuntimeShape::ExtendedShape(5, lhs_shape);
|
|
||||||
const RuntimeShape extended_rhs_shape =
|
|
||||||
RuntimeShape::ExtendedShape(5, rhs_shape);
|
|
||||||
|
|
||||||
const int batch_dim0 = batch_matmul::broadcast_dim(
|
|
||||||
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
|
|
||||||
const int batch_dim1 = batch_matmul::broadcast_dim(
|
|
||||||
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
|
|
||||||
const int batch_dim2 = batch_matmul::broadcast_dim(
|
|
||||||
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
|
|
||||||
|
|
||||||
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
|
|
||||||
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
|
|
||||||
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
|
|
||||||
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
|
|
||||||
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
|
|
||||||
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
|
|
||||||
|
|
||||||
// Set params for each matrix multiply.
|
|
||||||
const int lhs_rows = extended_lhs_shape.Dims(3);
|
|
||||||
const int rhs_cols = extended_rhs_shape.Dims(4);
|
|
||||||
const int accum_depth = extended_lhs_shape.Dims(4);
|
|
||||||
|
|
||||||
const int32_t input_offset = params.input_offset;
|
|
||||||
const int32_t filter_offset = params.weights_offset;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
const int32_t output_multiplier = params.output_multiplier;
|
|
||||||
const int output_shift = params.output_shift;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
|
|
||||||
for (int b0 = 0; b0 < batch_dim0; ++b0) {
|
|
||||||
const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
|
|
||||||
const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
|
|
||||||
for (int b1 = 0; b1 < batch_dim1; ++b1) {
|
|
||||||
const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
|
|
||||||
const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
|
|
||||||
for (int b2 = 0; b2 < batch_dim2; ++b2) {
|
|
||||||
const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
|
|
||||||
const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
|
|
||||||
T* out_ptr = output_data +
|
|
||||||
((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
|
|
||||||
lhs_rows * rhs_cols;
|
|
||||||
|
|
||||||
for (int j = 0; j < rhs_cols; ++j) {
|
|
||||||
for (int i = 0; i < lhs_rows; ++i) {
|
|
||||||
AccumT total = 0;
|
|
||||||
for (int k = 0; k < accum_depth; ++k) {
|
|
||||||
AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
|
|
||||||
AccumT rhs_val = rhs_ptr2[accum_depth * j + k];
|
|
||||||
total += (lhs_val + filter_offset) * (rhs_val + input_offset);
|
|
||||||
}
|
|
||||||
int32_t total_scaled = MultiplyByQuantizedMultiplier(
|
|
||||||
total, output_multiplier, output_shift);
|
|
||||||
total_scaled += output_offset;
|
|
||||||
total_scaled = std::max(total_scaled, output_activation_min);
|
|
||||||
total_scaled = std::min(total_scaled, output_activation_max);
|
|
||||||
const int idx = lhs_rows * j + i;
|
|
||||||
out_ptr[idx] = static_cast<T>(total_scaled);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
|
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
// TODO(b/135760455): Move this method anonymous namespace in a cc file.
|
|
||||||
inline RuntimeShape ExtendShapeBatchToSpace(const RuntimeShape& shape) {
|
|
||||||
if (shape.DimensionsCount() == 4) {
|
|
||||||
return shape;
|
|
||||||
}
|
|
||||||
RuntimeShape new_shape(4, 1);
|
|
||||||
new_shape.SetDim(0, shape.Dims(0));
|
|
||||||
new_shape.SetDim(1, shape.Dims(1));
|
|
||||||
new_shape.SetDim(3, shape.Dims(2));
|
|
||||||
return new_shape;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void BatchToSpaceND(const RuntimeShape& unextended_input1_shape,
|
|
||||||
const T* input1_data,
|
|
||||||
const RuntimeShape& unextended_input2_shape,
|
|
||||||
const int32_t* block_shape_data,
|
|
||||||
const RuntimeShape& unextended_input3_shape,
|
|
||||||
const int32_t* crops_data,
|
|
||||||
const RuntimeShape& unextended_output_shape,
|
|
||||||
T* output_data) {
|
|
||||||
ruy::profiler::ScopeLabel label("BatchToSpaceND");
|
|
||||||
TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3);
|
|
||||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(),
|
|
||||||
unextended_output_shape.DimensionsCount());
|
|
||||||
|
|
||||||
const RuntimeShape input1_shape =
|
|
||||||
ExtendShapeBatchToSpace(unextended_input1_shape);
|
|
||||||
const RuntimeShape output_shape =
|
|
||||||
ExtendShapeBatchToSpace(unextended_output_shape);
|
|
||||||
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_batch_size = output_shape.Dims(0);
|
|
||||||
|
|
||||||
const int depth = input1_shape.Dims(3);
|
|
||||||
const int input_width = input1_shape.Dims(2);
|
|
||||||
const int input_height = input1_shape.Dims(1);
|
|
||||||
const int input_batch_size = input1_shape.Dims(0);
|
|
||||||
|
|
||||||
const int block_shape_height = block_shape_data[0];
|
|
||||||
const int block_shape_width =
|
|
||||||
unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1;
|
|
||||||
const int crops_top = crops_data[0];
|
|
||||||
const int crops_left =
|
|
||||||
unextended_input1_shape.DimensionsCount() == 4 ? crops_data[2] : 0;
|
|
||||||
for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) {
|
|
||||||
const int out_batch = in_batch % output_batch_size;
|
|
||||||
const int spatial_offset = in_batch / output_batch_size;
|
|
||||||
for (int in_h = 0; in_h < input_height; ++in_h) {
|
|
||||||
const int out_h = in_h * block_shape_height +
|
|
||||||
spatial_offset / block_shape_width - crops_top;
|
|
||||||
if (out_h < 0 || out_h >= output_height) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for (int in_w = 0; in_w < input_width; ++in_w) {
|
|
||||||
const int out_w = in_w * block_shape_width +
|
|
||||||
spatial_offset % block_shape_width - crops_left;
|
|
||||||
|
|
||||||
if (out_w < 0 || out_w >= output_width) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
T* out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0);
|
|
||||||
const T* in =
|
|
||||||
input1_data + Offset(input1_shape, in_batch, in_h, in_w, 0);
|
|
||||||
memcpy(out, in, depth * sizeof(T));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
|
|
||||||
@@ -1,91 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
// Also appears to duplicate MinimumMaximum.
|
|
||||||
//
|
|
||||||
// R: Result type. T1: Input 1 type. T2: Input 2 type.
|
|
||||||
template <typename R, typename T1, typename T2>
|
|
||||||
inline void BroadcastBinaryFunction4DSlow(
|
|
||||||
const RuntimeShape& unextended_input1_shape, const T1* input1_data,
|
|
||||||
const RuntimeShape& unextended_input2_shape, const T2* input2_data,
|
|
||||||
const RuntimeShape& unextended_output_shape, R* output_data,
|
|
||||||
R (*func)(T1, T2)) {
|
|
||||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
|
||||||
const RuntimeShape output_shape =
|
|
||||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
|
||||||
|
|
||||||
NdArrayDesc<4> desc1;
|
|
||||||
NdArrayDesc<4> desc2;
|
|
||||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
|
||||||
unextended_input2_shape, &desc1, &desc2);
|
|
||||||
|
|
||||||
const int* dims_data =
|
|
||||||
reinterpret_cast<const int*>(output_shape.DimsDataUpTo5D());
|
|
||||||
for (int b = 0; b < output_shape.Dims(0); ++b) {
|
|
||||||
int out_idx_b = b * dims_data[1];
|
|
||||||
int in_idx1_b = desc1.strides[0] * b;
|
|
||||||
int in_idx2_b = desc2.strides[0] * b;
|
|
||||||
for (int y = 0; y < output_shape.Dims(1); ++y) {
|
|
||||||
int out_idx_y = (out_idx_b + y) * dims_data[2];
|
|
||||||
int in_idx1_y = in_idx1_b + desc1.strides[1] * y;
|
|
||||||
int in_idx2_y = in_idx2_b + desc2.strides[1] * y;
|
|
||||||
for (int x = 0; x < output_shape.Dims(2); ++x) {
|
|
||||||
int out_idx_x = (out_idx_y + x) * dims_data[3];
|
|
||||||
int in1_idx = in_idx1_y + desc1.strides[2] * x;
|
|
||||||
int in2_idx = in_idx2_y + desc2.strides[2] * x;
|
|
||||||
for (int c = 0; c < output_shape.Dims(3); ++c) {
|
|
||||||
auto out_idx = out_idx_x + c;
|
|
||||||
auto in1_val = input1_data[in1_idx];
|
|
||||||
auto in2_val = input2_data[in2_idx];
|
|
||||||
output_data[out_idx] = func(in1_val, in2_val);
|
|
||||||
in1_idx += desc1.strides[3];
|
|
||||||
in2_idx += desc2.strides[3];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// R: Result type. T1: Input 1 type. T2: Input 2 type.
|
|
||||||
template <typename R, typename T1, typename T2>
|
|
||||||
inline void BinaryFunction(const RuntimeShape& input1_shape,
|
|
||||||
const T1* input1_data,
|
|
||||||
const RuntimeShape& input2_shape,
|
|
||||||
const T2* input2_data,
|
|
||||||
const RuntimeShape& output_shape, R* output_data,
|
|
||||||
R (*func)(T1, T2)) {
|
|
||||||
const int flat_size =
|
|
||||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
|
||||||
for (int i = 0; i < flat_size; ++i) {
|
|
||||||
output_data[i] = func(input1_data[i], input2_data[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void BroadcastArgs(const RuntimeShape& input1_shape, const T* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const T* input2_data,
|
|
||||||
const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
// Gets data at the backward index i of the shape tensor. Returns 1 if the
|
|
||||||
// index is out of range.
|
|
||||||
auto get_shape_data = [](const RuntimeShape& shape, const T* data,
|
|
||||||
int backward_idx) -> T {
|
|
||||||
int forward_idx = shape.FlatSize() - 1 - backward_idx;
|
|
||||||
if (forward_idx < 0) return 1;
|
|
||||||
return data[forward_idx];
|
|
||||||
};
|
|
||||||
|
|
||||||
int output_num_elements = output_shape.FlatSize();
|
|
||||||
for (int i = 0; i < output_num_elements; ++i) {
|
|
||||||
int backward_i = output_num_elements - 1 - i;
|
|
||||||
int shape1_i = get_shape_data(input1_shape, input1_data, i);
|
|
||||||
int shape2_i = get_shape_data(input2_shape, input2_data, i);
|
|
||||||
if (shape1_i == 1) {
|
|
||||||
output_data[backward_i] = shape2_i;
|
|
||||||
} else if (shape2_i == 1) {
|
|
||||||
output_data[backward_i] = shape1_i;
|
|
||||||
} else {
|
|
||||||
TFLITE_CHECK_EQ(shape1_i, shape2_i);
|
|
||||||
output_data[backward_i] = shape1_i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
|
|
||||||
@@ -1,97 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
template <int N>
|
|
||||||
void BroadcastImpl(const NdArrayDesc<N>& input_desc, const char* input_data,
|
|
||||||
const NdArrayDesc<N>& output_desc, char* output_data,
|
|
||||||
int indexes[N], int dim, const int last_broadcasting_dim,
|
|
||||||
const int type_size) {
|
|
||||||
// Copy data from input to output.
|
|
||||||
if (dim == last_broadcasting_dim) {
|
|
||||||
int copy_size = output_desc.strides[dim] * type_size;
|
|
||||||
const char* data_src =
|
|
||||||
input_data + SubscriptToIndex(input_desc, indexes) * type_size;
|
|
||||||
char* data_dst =
|
|
||||||
output_data + SubscriptToIndex(output_desc, indexes) * type_size;
|
|
||||||
for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
|
|
||||||
memcpy(data_dst, data_src, copy_size);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recursive call to find the next broadcasting.
|
|
||||||
for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim];
|
|
||||||
++indexes[dim]) {
|
|
||||||
BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes,
|
|
||||||
dim + 1, last_broadcasting_dim, type_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Duplicate data in output tensor.
|
|
||||||
indexes[dim] = 0;
|
|
||||||
if (input_desc.extents[dim] != output_desc.extents[dim]) {
|
|
||||||
int copy_size = output_desc.strides[dim] * type_size;
|
|
||||||
char* data_src =
|
|
||||||
output_data + SubscriptToIndex(output_desc, indexes) * type_size;
|
|
||||||
char* data_dst = data_src + copy_size;
|
|
||||||
for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
|
|
||||||
memcpy(data_dst, data_src, copy_size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <int N>
|
|
||||||
inline void BroadcastTo(const RuntimeShape& unextended_input_shape,
|
|
||||||
const char* input_data,
|
|
||||||
const RuntimeShape& unextended_output_shape,
|
|
||||||
char* output_data, TfLiteType data_type) {
|
|
||||||
NdArrayDesc<N> input_desc;
|
|
||||||
NdArrayDesc<N> output_desc;
|
|
||||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape),
|
|
||||||
&input_desc);
|
|
||||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
|
|
||||||
&output_desc);
|
|
||||||
|
|
||||||
// Get the last dimension has broadcasting. At this dimension, the data is
|
|
||||||
// copied from input tensor to output tensor.
|
|
||||||
int last_broadcast_dim = -1;
|
|
||||||
for (int i = N - 1; i >= 0; --i) {
|
|
||||||
if (input_desc.extents[i] != output_desc.extents[i]) {
|
|
||||||
last_broadcast_dim = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If non-broadcasting, just copy data from input to output tensor.
|
|
||||||
if (last_broadcast_dim == -1) {
|
|
||||||
memcpy(output_data, input_data,
|
|
||||||
unextended_input_shape.FlatSize() * TfLiteTypeGetSize(data_type));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Broadcasting using memcpy.
|
|
||||||
int indexes[N] = {0};
|
|
||||||
BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes, 0,
|
|
||||||
last_broadcast_dim, TfLiteTypeGetSize(data_type));
|
|
||||||
}
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
inline void Ceil(const RuntimeShape& input_shape, const float* input_data,
|
|
||||||
const RuntimeShape& output_shape, float* output_data) {
|
|
||||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
|
||||||
|
|
||||||
for (int i = 0; i < flat_size; ++i) {
|
|
||||||
output_data[i] = std::ceil(input_data[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
|
|
||||||
@@ -1,280 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline bool EqualFn(T lhs, T rhs) {
|
|
||||||
return lhs == rhs;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline bool NotEqualFn(T lhs, T rhs) {
|
|
||||||
return lhs != rhs;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline bool GreaterFn(T lhs, T rhs) {
|
|
||||||
return lhs > rhs;
|
|
||||||
}
|
|
||||||
template <typename T>
|
|
||||||
inline bool GreaterEqualFn(T lhs, T rhs) {
|
|
||||||
return lhs >= rhs;
|
|
||||||
}
|
|
||||||
template <typename T>
|
|
||||||
inline bool LessFn(T lhs, T rhs) {
|
|
||||||
return lhs < rhs;
|
|
||||||
}
|
|
||||||
template <typename T>
|
|
||||||
inline bool LessEqualFn(T lhs, T rhs) {
|
|
||||||
return lhs <= rhs;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
using ComparisonFn = bool (*)(T, T);
|
|
||||||
|
|
||||||
template <typename T, ComparisonFn<T> F>
|
|
||||||
inline void ComparisonImpl(
|
|
||||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
|
|
||||||
const T* input1_data, const RuntimeShape& input2_shape,
|
|
||||||
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
|
|
||||||
const int64_t flatsize =
|
|
||||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
|
||||||
for (int64_t i = 0; i < flatsize; ++i) {
|
|
||||||
output_data[i] = F(input1_data[i], input2_data[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <ComparisonFn<float> F>
|
|
||||||
inline void Comparison(const ComparisonParams& op_params,
|
|
||||||
const RuntimeShape& input1_shape,
|
|
||||||
const float* input1_data,
|
|
||||||
const RuntimeShape& input2_shape,
|
|
||||||
const float* input2_data,
|
|
||||||
const RuntimeShape& output_shape, bool* output_data) {
|
|
||||||
ComparisonImpl<float, F>(op_params, input1_shape, input1_data, input2_shape,
|
|
||||||
input2_data, output_shape, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, ComparisonFn<int32_t> F>
|
|
||||||
inline void ComparisonWithScaling(
|
|
||||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
|
|
||||||
const T* input1_data, const RuntimeShape& input2_shape,
|
|
||||||
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
|
|
||||||
int left_shift = op_params.left_shift;
|
|
||||||
int32_t input1_offset = op_params.input1_offset;
|
|
||||||
int32_t input1_multiplier = op_params.input1_multiplier;
|
|
||||||
int input1_shift = op_params.input1_shift;
|
|
||||||
int32_t input2_offset = op_params.input2_offset;
|
|
||||||
int32_t input2_multiplier = op_params.input2_multiplier;
|
|
||||||
int input2_shift = op_params.input2_shift;
|
|
||||||
|
|
||||||
const int64_t flatsize =
|
|
||||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
|
||||||
for (int64_t i = 0; i < flatsize; ++i) {
|
|
||||||
const int32_t input1_val = input1_offset + input1_data[i];
|
|
||||||
const int32_t input2_val = input2_offset + input2_data[i];
|
|
||||||
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
|
|
||||||
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
|
|
||||||
const int32_t scaled_input1_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input1_val, input1_multiplier, input1_shift);
|
|
||||||
const int32_t scaled_input2_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input2_val, input2_multiplier, input2_shift);
|
|
||||||
output_data[i] = F(scaled_input1_val, scaled_input2_val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct BroadcastComparison4DSlowCommon {
|
|
||||||
const RuntimeShape output_shape;
|
|
||||||
NdArrayDesc<4> desc1;
|
|
||||||
NdArrayDesc<4> desc2;
|
|
||||||
};
|
|
||||||
|
|
||||||
inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess(
|
|
||||||
const RuntimeShape& unextended_input1_shape,
|
|
||||||
const RuntimeShape& unextended_input2_shape,
|
|
||||||
const RuntimeShape& unextended_output_shape) {
|
|
||||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
|
||||||
NdArrayDesc<4> desc1;
|
|
||||||
NdArrayDesc<4> desc2;
|
|
||||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
|
||||||
unextended_input2_shape, &desc1, &desc2);
|
|
||||||
return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1,
|
|
||||||
desc2};
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, ComparisonFn<T> F>
|
|
||||||
inline void BroadcastComparison4DSlowImpl(
|
|
||||||
const ComparisonParams& op_params,
|
|
||||||
const RuntimeShape& unextended_input1_shape, const T* input1_data,
|
|
||||||
const RuntimeShape& unextended_input2_shape, const T* input2_data,
|
|
||||||
const RuntimeShape& unextended_output_shape, bool* output_data) {
|
|
||||||
const BroadcastComparison4DSlowCommon dims =
|
|
||||||
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
|
|
||||||
unextended_input2_shape,
|
|
||||||
unextended_output_shape);
|
|
||||||
|
|
||||||
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
|
|
||||||
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
|
|
||||||
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
|
|
||||||
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
|
|
||||||
output_data[Offset(dims.output_shape, b, y, x, c)] =
|
|
||||||
F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)],
|
|
||||||
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <ComparisonFn<float> F>
|
|
||||||
inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
|
|
||||||
const RuntimeShape& input1_shape,
|
|
||||||
const float* input1_data,
|
|
||||||
const RuntimeShape& input2_shape,
|
|
||||||
const float* input2_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
bool* output_data) {
|
|
||||||
BroadcastComparison4DSlowImpl<float, F>(op_params, input1_shape, input1_data,
|
|
||||||
input2_shape, input2_data,
|
|
||||||
output_shape, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, ComparisonFn<int32_t> F>
|
|
||||||
inline void BroadcastComparison4DSlowWithScaling(
|
|
||||||
const ComparisonParams& op_params,
|
|
||||||
const RuntimeShape& unextended_input1_shape, const T* input1_data,
|
|
||||||
const RuntimeShape& unextended_input2_shape, const T* input2_data,
|
|
||||||
const RuntimeShape& unextended_output_shape, bool* output_data) {
|
|
||||||
const BroadcastComparison4DSlowCommon dims =
|
|
||||||
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
|
|
||||||
unextended_input2_shape,
|
|
||||||
unextended_output_shape);
|
|
||||||
|
|
||||||
int left_shift = op_params.left_shift;
|
|
||||||
int32_t input1_offset = op_params.input1_offset;
|
|
||||||
int32_t input1_multiplier = op_params.input1_multiplier;
|
|
||||||
int input1_shift = op_params.input1_shift;
|
|
||||||
int32_t input2_offset = op_params.input2_offset;
|
|
||||||
int32_t input2_multiplier = op_params.input2_multiplier;
|
|
||||||
int input2_shift = op_params.input2_shift;
|
|
||||||
|
|
||||||
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
|
|
||||||
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
|
|
||||||
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
|
|
||||||
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
|
|
||||||
const int32_t input1_val =
|
|
||||||
input1_offset +
|
|
||||||
input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
|
|
||||||
const int32_t input2_val =
|
|
||||||
input2_offset +
|
|
||||||
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
|
|
||||||
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
|
|
||||||
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
|
|
||||||
const int32_t scaled_input1_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input1_val, input1_multiplier, input1_shift);
|
|
||||||
const int32_t scaled_input2_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input2_val, input2_multiplier, input2_shift);
|
|
||||||
output_data[Offset(dims.output_shape, b, y, x, c)] =
|
|
||||||
F(scaled_input1_val, scaled_input2_val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define TFLITE_COMPARISON_OP(name) \
|
|
||||||
inline void name(const ComparisonParams& op_params, \
|
|
||||||
const RuntimeShape& input1_shape, const float* input1_data, \
|
|
||||||
const RuntimeShape& input2_shape, const float* input2_data, \
|
|
||||||
const RuntimeShape& output_shape, bool* output_data) { \
|
|
||||||
Comparison<name##Fn>(op_params, input1_shape, input1_data, input2_shape, \
|
|
||||||
input2_data, output_shape, output_data); \
|
|
||||||
} \
|
|
||||||
template <typename T> \
|
|
||||||
inline void name##NoScaling( \
|
|
||||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
|
|
||||||
const T* input1_data, const RuntimeShape& input2_shape, \
|
|
||||||
const T* input2_data, const RuntimeShape& output_shape, \
|
|
||||||
bool* output_data) { \
|
|
||||||
ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data, \
|
|
||||||
input2_shape, input2_data, output_shape, \
|
|
||||||
output_data); \
|
|
||||||
} \
|
|
||||||
template <typename T> \
|
|
||||||
inline void name##WithScaling( \
|
|
||||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
|
|
||||||
const T* input1_data, const RuntimeShape& input2_shape, \
|
|
||||||
const T* input2_data, const RuntimeShape& output_shape, \
|
|
||||||
bool* output_data) { \
|
|
||||||
ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data, \
|
|
||||||
input2_shape, input2_data, \
|
|
||||||
output_shape, output_data); \
|
|
||||||
} \
|
|
||||||
template <typename T> \
|
|
||||||
inline void Broadcast4DSlow##name##NoScaling( \
|
|
||||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
|
|
||||||
const T* input1_data, const RuntimeShape& input2_shape, \
|
|
||||||
const T* input2_data, const RuntimeShape& output_shape, \
|
|
||||||
bool* output_data) { \
|
|
||||||
BroadcastComparison4DSlowImpl<T, name##Fn>( \
|
|
||||||
op_params, input1_shape, input1_data, input2_shape, input2_data, \
|
|
||||||
output_shape, output_data); \
|
|
||||||
} \
|
|
||||||
inline void Broadcast4DSlow##name( \
|
|
||||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
|
|
||||||
const float* input1_data, const RuntimeShape& input2_shape, \
|
|
||||||
const float* input2_data, const RuntimeShape& output_shape, \
|
|
||||||
bool* output_data) { \
|
|
||||||
BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data, \
|
|
||||||
input2_shape, input2_data, \
|
|
||||||
output_shape, output_data); \
|
|
||||||
} \
|
|
||||||
template <typename T> \
|
|
||||||
inline void Broadcast4DSlow##name##WithScaling( \
|
|
||||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
|
|
||||||
const T* input1_data, const RuntimeShape& input2_shape, \
|
|
||||||
const T* input2_data, const RuntimeShape& output_shape, \
|
|
||||||
bool* output_data) { \
|
|
||||||
BroadcastComparison4DSlowWithScaling<T, name##Fn>( \
|
|
||||||
op_params, input1_shape, input1_data, input2_shape, input2_data, \
|
|
||||||
output_shape, output_data); \
|
|
||||||
}
|
|
||||||
TFLITE_COMPARISON_OP(Equal);
|
|
||||||
TFLITE_COMPARISON_OP(NotEqual);
|
|
||||||
TFLITE_COMPARISON_OP(Greater);
|
|
||||||
TFLITE_COMPARISON_OP(GreaterEqual);
|
|
||||||
TFLITE_COMPARISON_OP(Less);
|
|
||||||
TFLITE_COMPARISON_OP(LessEqual);
|
|
||||||
#undef TFLITE_COMPARISON_OP
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
|
|
||||||
@@ -1,141 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename Scalar>
|
|
||||||
inline void Concatenation(const ConcatenationParams& params,
|
|
||||||
const RuntimeShape* const* input_shapes,
|
|
||||||
const Scalar* const* input_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
Scalar* output_data) {
|
|
||||||
int axis = params.axis;
|
|
||||||
int inputs_count = params.inputs_count;
|
|
||||||
const int concat_dimensions = output_shape.DimensionsCount();
|
|
||||||
TFLITE_DCHECK_LT(axis, concat_dimensions);
|
|
||||||
|
|
||||||
int64_t concat_size = 0;
|
|
||||||
for (int i = 0; i < inputs_count; i++) {
|
|
||||||
TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
|
|
||||||
for (int j = 0; j < concat_dimensions; j++) {
|
|
||||||
if (j != axis) {
|
|
||||||
MatchingDim(*input_shapes[i], j, output_shape, j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
concat_size += input_shapes[i]->Dims(axis);
|
|
||||||
}
|
|
||||||
TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
|
|
||||||
int64_t outer_size = 1;
|
|
||||||
for (int i = 0; i < axis; ++i) {
|
|
||||||
outer_size *= output_shape.Dims(i);
|
|
||||||
}
|
|
||||||
// For all input arrays,
|
|
||||||
// FlatSize() = outer_size * Dims(axis) * base_inner_size;
|
|
||||||
int64_t base_inner_size = 1;
|
|
||||||
for (int i = axis + 1; i < concat_dimensions; ++i) {
|
|
||||||
base_inner_size *= output_shape.Dims(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* output_ptr = output_data;
|
|
||||||
for (int k = 0; k < outer_size; k++) {
|
|
||||||
for (int i = 0; i < inputs_count; ++i) {
|
|
||||||
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
|
|
||||||
const Scalar* input_ptr = input_data[i] + k * copy_size;
|
|
||||||
memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
|
|
||||||
output_ptr += copy_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(b/174275780): The quantized implementation of concatentation isn't fully
|
|
||||||
// quantized as it takes scale as a floating point value. This should be fixed
|
|
||||||
// when optimizng this routine further.
|
|
||||||
inline void ConcatenationWithScaling(const ConcatenationParams& params,
|
|
||||||
const RuntimeShape* const* input_shapes,
|
|
||||||
const uint8_t* const* input_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
uint8_t* output_data) {
|
|
||||||
int axis = params.axis;
|
|
||||||
const int32_t* input_zeropoint = params.input_zeropoint;
|
|
||||||
const float* input_scale = params.input_scale;
|
|
||||||
int inputs_count = params.inputs_count;
|
|
||||||
const int32_t output_zeropoint = params.output_zeropoint;
|
|
||||||
const float output_scale = params.output_scale;
|
|
||||||
|
|
||||||
const int concat_dimensions = output_shape.DimensionsCount();
|
|
||||||
TFLITE_DCHECK_LT(axis, concat_dimensions);
|
|
||||||
|
|
||||||
int64_t concat_size = 0;
|
|
||||||
for (int i = 0; i < inputs_count; i++) {
|
|
||||||
TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
|
|
||||||
for (int j = 0; j < concat_dimensions; j++) {
|
|
||||||
if (j != axis) {
|
|
||||||
MatchingDim(*input_shapes[i], j, output_shape, j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
concat_size += input_shapes[i]->Dims(axis);
|
|
||||||
}
|
|
||||||
TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
|
|
||||||
int64_t outer_size = 1;
|
|
||||||
for (int i = 0; i < axis; ++i) {
|
|
||||||
outer_size *= output_shape.Dims(i);
|
|
||||||
}
|
|
||||||
// For all input arrays,
|
|
||||||
// FlatSize() = outer_size * Dims(axis) * base_inner_size;
|
|
||||||
int64_t base_inner_size = 1;
|
|
||||||
for (int i = axis + 1; i < concat_dimensions; ++i) {
|
|
||||||
base_inner_size *= output_shape.Dims(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
const float inverse_output_scale = 1.f / output_scale;
|
|
||||||
uint8_t* output_ptr = output_data;
|
|
||||||
for (int k = 0; k < outer_size; k++) {
|
|
||||||
for (int i = 0; i < inputs_count; ++i) {
|
|
||||||
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
|
|
||||||
const uint8_t* input_ptr = input_data[i] + k * copy_size;
|
|
||||||
if (input_zeropoint[i] == output_zeropoint &&
|
|
||||||
input_scale[i] == output_scale) {
|
|
||||||
memcpy(output_ptr, input_ptr, copy_size);
|
|
||||||
} else {
|
|
||||||
const float scale = input_scale[i] * inverse_output_scale;
|
|
||||||
const float bias = -input_zeropoint[i] * scale;
|
|
||||||
for (int j = 0; j < copy_size; ++j) {
|
|
||||||
const int32_t value = static_cast<int32_t>(tflite::TfLiteRound(
|
|
||||||
input_ptr[j] * scale + bias)) +
|
|
||||||
output_zeropoint;
|
|
||||||
output_ptr[j] = static_cast<uint8_t>(
|
|
||||||
std::max<int32_t>(std::min<int32_t>(255, value), 0));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
output_ptr += copy_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
|
|
||||||
@@ -1,287 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
|
||||||
const float* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const float* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const float* bias_data, const RuntimeShape& output_shape,
|
|
||||||
float* output_data, const RuntimeShape& im2col_shape,
|
|
||||||
float* im2col_data) {
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
const float output_activation_min = params.float_activation_min;
|
|
||||||
const float output_activation_max = params.float_activation_max;
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
|
|
||||||
(void)im2col_data; // only used in optimized code.
|
|
||||||
(void)im2col_shape; // only used in optimized code.
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
|
||||||
if (bias_data) {
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
}
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int filter_input_depth = filter_shape.Dims(3);
|
|
||||||
const int groups = input_depth / filter_input_depth;
|
|
||||||
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
|
|
||||||
const int filters_per_group = output_depth / groups;
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
|
||||||
auto group = out_channel / filters_per_group;
|
|
||||||
float total = 0.f;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
const int in_y = in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
|
|
||||||
// Zero padding by omitting the areas outside the image.
|
|
||||||
const bool is_point_inside_image =
|
|
||||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height);
|
|
||||||
|
|
||||||
if (!is_point_inside_image) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for (int in_channel = 0; in_channel < filter_input_depth;
|
|
||||||
++in_channel) {
|
|
||||||
float input_value =
|
|
||||||
input_data[Offset(input_shape, batch, in_y, in_x,
|
|
||||||
in_channel + group * filter_input_depth)];
|
|
||||||
float filter_value = filter_data[Offset(
|
|
||||||
filter_shape, out_channel, filter_y, filter_x, in_channel)];
|
|
||||||
total += (input_value * filter_value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
float bias_value = 0.0f;
|
|
||||||
if (bias_data) {
|
|
||||||
bias_value = bias_data[out_channel];
|
|
||||||
}
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
|
||||||
ActivationFunctionWithMinMax(total + bias_value,
|
|
||||||
output_activation_min,
|
|
||||||
output_activation_max);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
|
||||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
uint8_t* output_data, const RuntimeShape& im2col_shape,
|
|
||||||
uint8_t* im2col_data, void* cpu_backend_context) {
|
|
||||||
(void)cpu_backend_context; // only used in optimized code.
|
|
||||||
(void)im2col_data; // only used in optimized code.
|
|
||||||
(void)im2col_shape; // only used in optimized code.
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
const int32_t input_offset = params.input_offset;
|
|
||||||
const int32_t filter_offset = params.weights_offset;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
const int32_t output_multiplier = params.output_multiplier;
|
|
||||||
const int output_shift = params.output_shift;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
|
||||||
if (bias_data) {
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
}
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int filter_input_depth = filter_shape.Dims(3);
|
|
||||||
const int groups = input_depth / filter_input_depth;
|
|
||||||
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
|
|
||||||
const int filters_per_group = output_depth / groups;
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
|
||||||
auto group = out_channel / filters_per_group;
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
const int in_y = in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
|
|
||||||
// Zero padding by omitting the areas outside the image.
|
|
||||||
const bool is_point_inside_image =
|
|
||||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height);
|
|
||||||
|
|
||||||
if (!is_point_inside_image) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int in_channel = 0; in_channel < filter_input_depth;
|
|
||||||
++in_channel) {
|
|
||||||
int32_t input_val =
|
|
||||||
input_data[Offset(input_shape, batch, in_y, in_x,
|
|
||||||
in_channel + group * filter_input_depth)];
|
|
||||||
int32_t filter_val = filter_data[Offset(
|
|
||||||
filter_shape, out_channel, filter_y, filter_x, in_channel)];
|
|
||||||
acc +=
|
|
||||||
(filter_val + filter_offset) * (input_val + input_offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[out_channel];
|
|
||||||
}
|
|
||||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
|
|
||||||
output_shift);
|
|
||||||
acc += output_offset;
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
|
||||||
static_cast<uint8_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void HybridConvPerChannel(
|
|
||||||
const ConvParams& params, float* scaling_factors_ptr,
|
|
||||||
const RuntimeShape& input_shape, const int8_t* input_data,
|
|
||||||
const RuntimeShape& filter_shape, const int8_t* filter_data,
|
|
||||||
const RuntimeShape& bias_shape, const float* bias_data,
|
|
||||||
const RuntimeShape& output_shape, float* output_data,
|
|
||||||
const RuntimeShape& im2col_shape, int8_t* im2col_data,
|
|
||||||
const float* per_channel_scale, int32_t* input_offset) {
|
|
||||||
(void)im2col_data; // only used in optimized code.
|
|
||||||
(void)im2col_shape; // only used in optimized code.
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
const float output_activation_min = params.float_activation_min;
|
|
||||||
const float output_activation_max = params.float_activation_max;
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
|
||||||
if (bias_data) {
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
}
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int filter_input_depth = filter_shape.Dims(3);
|
|
||||||
const int groups = input_depth / filter_input_depth;
|
|
||||||
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
|
|
||||||
const int filters_per_group = output_depth / groups;
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
|
||||||
auto group = out_channel / filters_per_group;
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
for (int in_channel = 0; in_channel < filter_input_depth;
|
|
||||||
++in_channel) {
|
|
||||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
const int in_y =
|
|
||||||
in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
// If the location is outside the bounds of the input image,
|
|
||||||
// use zero as a default value.
|
|
||||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height)) {
|
|
||||||
int32_t input_val = input_data[Offset(
|
|
||||||
input_shape, batch, in_y, in_x,
|
|
||||||
in_channel + group * filter_input_depth)];
|
|
||||||
int32_t filter_val =
|
|
||||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
|
||||||
filter_x, in_channel)];
|
|
||||||
acc += filter_val * (input_val - input_offset[batch]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
float acc_float =
|
|
||||||
acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
|
|
||||||
if (bias_data) {
|
|
||||||
acc_float += bias_data[out_channel];
|
|
||||||
}
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
|
||||||
ActivationFunctionWithMinMax(acc_float, output_activation_min,
|
|
||||||
output_activation_max);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
|
||||||
@@ -1,175 +0,0 @@
|
|||||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
|
|
||||||
bool exclusive, bool reverse, T* output_data) {
|
|
||||||
const int32_t rank = shape.DimensionsCount();
|
|
||||||
TFLITE_DCHECK_GE(rank, 1);
|
|
||||||
TFLITE_DCHECK_GE(axis, 0);
|
|
||||||
TFLITE_DCHECK_LT(axis, rank);
|
|
||||||
|
|
||||||
size_t inner = 1;
|
|
||||||
size_t outer = 1;
|
|
||||||
size_t depth = 1;
|
|
||||||
for (int32_t i = 0; i < rank; i++) {
|
|
||||||
if (i < axis)
|
|
||||||
inner *= shape.Dims(i);
|
|
||||||
else if (i > axis)
|
|
||||||
outer *= shape.Dims(i);
|
|
||||||
else
|
|
||||||
depth = shape.Dims(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t outer_index = 0; outer_index < outer; outer_index++) {
|
|
||||||
size_t outer_index_adj;
|
|
||||||
if (reverse)
|
|
||||||
outer_index_adj = (outer - 1) - outer_index;
|
|
||||||
else
|
|
||||||
outer_index_adj = outer_index;
|
|
||||||
for (size_t inner_index = 0; inner_index < inner; inner_index++) {
|
|
||||||
T accumulator = 0;
|
|
||||||
size_t inner_index_adj;
|
|
||||||
if (reverse)
|
|
||||||
inner_index_adj = (inner - 1) - inner_index;
|
|
||||||
else
|
|
||||||
inner_index_adj = inner_index;
|
|
||||||
for (size_t depth_index = 0; depth_index < depth; depth_index++) {
|
|
||||||
size_t depth_index_adj;
|
|
||||||
if (reverse)
|
|
||||||
depth_index_adj = (depth - 1) - depth_index;
|
|
||||||
else
|
|
||||||
depth_index_adj = depth_index;
|
|
||||||
|
|
||||||
size_t index = outer_index_adj;
|
|
||||||
index += inner_index_adj * depth * outer;
|
|
||||||
index += depth_index_adj * outer;
|
|
||||||
|
|
||||||
if (exclusive) {
|
|
||||||
output_data[index] = accumulator;
|
|
||||||
accumulator += input_data[index];
|
|
||||||
} else {
|
|
||||||
accumulator += input_data[index];
|
|
||||||
output_data[index] = accumulator;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Quantized INT8 CUMSUM
|
|
||||||
//
|
|
||||||
inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
|
|
||||||
const RuntimeShape& shape, int32_t axis, bool exclusive,
|
|
||||||
bool reverse, int8_t* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
// Input offset is negative input zero point. Activation tensors are
|
|
||||||
// asymmetric quantized so they span the full int8 range.
|
|
||||||
// All inputs should have same zero-point and scale, this is checked during
|
|
||||||
// Prepare stage.
|
|
||||||
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
|
|
||||||
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
|
|
||||||
|
|
||||||
const int32_t rank = shape.DimensionsCount();
|
|
||||||
TFLITE_DCHECK_GE(rank, 1);
|
|
||||||
TFLITE_DCHECK_GE(axis, 0);
|
|
||||||
TFLITE_DCHECK_LT(axis, rank);
|
|
||||||
|
|
||||||
size_t inner = 1;
|
|
||||||
size_t outer = 1;
|
|
||||||
size_t depth = 1;
|
|
||||||
for (int32_t i = 0; i < rank; i++) {
|
|
||||||
if (i < axis)
|
|
||||||
inner *= shape.Dims(i);
|
|
||||||
else if (i > axis)
|
|
||||||
outer *= shape.Dims(i);
|
|
||||||
else
|
|
||||||
depth = shape.Dims(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t outer_index = 0; outer_index < outer; outer_index++) {
|
|
||||||
size_t outer_index_adj;
|
|
||||||
if (reverse)
|
|
||||||
outer_index_adj = (outer - 1) - outer_index;
|
|
||||||
else
|
|
||||||
outer_index_adj = outer_index;
|
|
||||||
for (size_t inner_index = 0; inner_index < inner; inner_index++) {
|
|
||||||
int32_t accumulator = params.input1_offset; // accumulator = 0
|
|
||||||
accumulator *= (1 << params.left_shift);
|
|
||||||
accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
accumulator, params.input1_multiplier, params.input1_shift);
|
|
||||||
|
|
||||||
size_t inner_index_adj;
|
|
||||||
if (reverse)
|
|
||||||
inner_index_adj = (inner - 1) - inner_index;
|
|
||||||
else
|
|
||||||
inner_index_adj = inner_index;
|
|
||||||
|
|
||||||
for (size_t depth_index = 0; depth_index < depth; depth_index++) {
|
|
||||||
size_t depth_index_adj;
|
|
||||||
if (reverse)
|
|
||||||
depth_index_adj = (depth - 1) - depth_index;
|
|
||||||
else
|
|
||||||
depth_index_adj = depth_index;
|
|
||||||
|
|
||||||
size_t index = outer_index_adj;
|
|
||||||
index += inner_index_adj * depth * outer;
|
|
||||||
index += depth_index_adj * outer;
|
|
||||||
|
|
||||||
const int32_t y = params.input1_offset + input_data[index];
|
|
||||||
const int32_t shifted_y = y * (1 << params.left_shift);
|
|
||||||
const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_y, params.input1_multiplier, params.input1_shift);
|
|
||||||
|
|
||||||
int32_t scaled_output;
|
|
||||||
if (exclusive) {
|
|
||||||
scaled_output = accumulator;
|
|
||||||
accumulator += scaled_y;
|
|
||||||
} else {
|
|
||||||
accumulator += scaled_y;
|
|
||||||
scaled_output = accumulator;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int32_t raw_output =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
scaled_output, params.output_multiplier, params.output_shift) +
|
|
||||||
params.output_offset;
|
|
||||||
const int32_t clamped_output =
|
|
||||||
std::min(params.quantized_activation_max,
|
|
||||||
std::max(params.quantized_activation_min, raw_output));
|
|
||||||
output_data[index] = static_cast<int8_t>(clamped_output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
|
|
||||||
@@ -1,79 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
|
|
||||||
const RuntimeShape& unextended_input_shape,
|
|
||||||
const T* input_data,
|
|
||||||
const RuntimeShape& unextended_output_shape,
|
|
||||||
T* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
|
||||||
const RuntimeShape input_shape =
|
|
||||||
RuntimeShape::ExtendedShape(4, unextended_input_shape);
|
|
||||||
const RuntimeShape output_shape =
|
|
||||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
|
||||||
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_batch = input_shape.Dims(0);
|
|
||||||
|
|
||||||
const int output_depth = output_shape.Dims(3);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_batch = output_shape.Dims(0);
|
|
||||||
|
|
||||||
const int32_t block_size = op_params.block_size;
|
|
||||||
|
|
||||||
TFLITE_DCHECK_EQ(input_width * block_size, output_width);
|
|
||||||
TFLITE_DCHECK_EQ(input_height * block_size, output_height);
|
|
||||||
TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size);
|
|
||||||
TFLITE_DCHECK_EQ(input_batch, output_batch);
|
|
||||||
|
|
||||||
for (int out_b = 0; out_b < output_batch; ++out_b) {
|
|
||||||
for (int out_h = 0; out_h < output_height; ++out_h) {
|
|
||||||
for (int out_w = 0; out_w < output_width; ++out_w) {
|
|
||||||
for (int out_d = 0; out_d < output_depth; ++out_d) {
|
|
||||||
const int in_d =
|
|
||||||
out_d + ((out_h % block_size) * block_size + out_w % block_size) *
|
|
||||||
output_depth;
|
|
||||||
|
|
||||||
const int in_w = out_w / block_size;
|
|
||||||
const int in_h = out_h / block_size;
|
|
||||||
const int in_b = out_b;
|
|
||||||
|
|
||||||
const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
|
|
||||||
const int output_index =
|
|
||||||
Offset(output_shape, out_b, out_h, out_w, out_d);
|
|
||||||
|
|
||||||
output_data[output_index] = input_data[input_index];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
|
|
||||||
@@ -1,100 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
inline void DepthwiseConv(
|
|
||||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
|
||||||
const float* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const float* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const float* bias_data, const RuntimeShape& output_shape,
|
|
||||||
float* output_data) {
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
const int depth_multiplier = params.depth_multiplier;
|
|
||||||
const float output_activation_min = params.float_activation_min;
|
|
||||||
const float output_activation_max = params.float_activation_max;
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
|
|
||||||
for (int b = 0; b < batches; ++b) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int ic = 0; ic < input_depth; ++ic) {
|
|
||||||
for (int m = 0; m < depth_multiplier; m++) {
|
|
||||||
const int oc = m + ic * depth_multiplier;
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
float total = 0.f;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
const int in_y =
|
|
||||||
in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
// If the location is outside the bounds of the input image,
|
|
||||||
// use zero as a default value.
|
|
||||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height)) {
|
|
||||||
float input_value =
|
|
||||||
input_data[Offset(input_shape, b, in_y, in_x, ic)];
|
|
||||||
float filter_value = filter_data[Offset(
|
|
||||||
filter_shape, 0, filter_y, filter_x, oc)];
|
|
||||||
total += (input_value * filter_value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
float bias_value = 0.0f;
|
|
||||||
if (bias_data) {
|
|
||||||
bias_value = bias_data[oc];
|
|
||||||
}
|
|
||||||
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
|
|
||||||
ActivationFunctionWithMinMax(total + bias_value,
|
|
||||||
output_activation_min,
|
|
||||||
output_activation_max);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // end namespace reference_ops
|
|
||||||
} // end namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
|
|
||||||
@@ -1,319 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "fixedpoint/fixedpoint.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
// Used in tests and template parameters to control which version of depthwise
|
|
||||||
// convolution is called. Primarily for reference code, and specializations
|
|
||||||
// forced in tests.
|
|
||||||
enum class DepthwiseConvImplementation {
|
|
||||||
// Run all tests against kUseStandardEntry even if also testing another
|
|
||||||
// kernel, since we need to be sure that the main DepthwiseConv() function in
|
|
||||||
// optimized_ops.h dispatches to a correctly-executing kernel.
|
|
||||||
kNone = 0, // The "default" option: use the normal
|
|
||||||
// DepthwiseConv kernel (entry) function.
|
|
||||||
kUseGenericKernel, // Forced use of generic kernel.
|
|
||||||
kUseNeon3x3, // 3x3 kernel that uses NEON when available.
|
|
||||||
kUseNeon3x3DotProduct, // 3x3 kernel that uses dot-product enabled NEON
|
|
||||||
// when available.
|
|
||||||
kUseCModel3x3DotProduct, // 3x3 kernel, reference C model that is intended
|
|
||||||
// to match overall design NEON code.
|
|
||||||
kUseUnwound3x3DotProduct, // 3x3 kernel, reference C model with unwound loops
|
|
||||||
// and some arrays.
|
|
||||||
kUseIntrinsics3x3DotProduct, // 3x3 kernel using NEON intrinsics.
|
|
||||||
};
|
|
||||||
|
|
||||||
// Category of depthwise convolution output rounding.
|
|
||||||
enum class DepthwiseConvOutputRounding {
|
|
||||||
kNone = 0, // Invalid: specific method must be specified.
|
|
||||||
kAwayFromZero, // Original method: exact halves rounded away from zero.
|
|
||||||
kUpward, // Halves towards +infinity: adds 0.5 before truncate.
|
|
||||||
// This is where a future kNearestEven would be placed.
|
|
||||||
};
|
|
||||||
|
|
||||||
// Category of depthwise convolution depth multiplication.
|
|
||||||
enum class DepthwiseConvDepthMultiplication {
|
|
||||||
kNoMultiplication = 0, // Depth multiplier = 1.
|
|
||||||
kUnitInputDepth, // Input depth = 1, output depth = depth multiplier.
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
namespace depthwise_conv {
|
|
||||||
|
|
||||||
template <DepthwiseConvOutputRounding output_rounding>
|
|
||||||
inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
|
|
||||||
int shift) {
|
|
||||||
TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
|
|
||||||
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Single-rounding MultiplyByQuantizedMultiplier
|
|
||||||
#if TFLITE_SINGLE_ROUNDING
|
|
||||||
template <>
|
|
||||||
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
|
|
||||||
int32_t x, int32_t quantized_multiplier, int shift) {
|
|
||||||
using gemmlowp::RoundingDivideByPOT;
|
|
||||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
|
||||||
int left_shift = shift > 0 ? shift : 0;
|
|
||||||
int right_shift = shift > 0 ? 0 : -shift;
|
|
||||||
return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
|
|
||||||
x * (1 << left_shift), quantized_multiplier),
|
|
||||||
right_shift);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
|
|
||||||
int32_t x, int32_t quantized_multiplier, int shift) {
|
|
||||||
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
|
|
||||||
}
|
|
||||||
// Double-rounding MultiplyByQuantizedMultiplier
|
|
||||||
#else
|
|
||||||
template <>
|
|
||||||
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
|
|
||||||
int32_t x, int32_t quantized_multiplier, int shift) {
|
|
||||||
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
|
|
||||||
int32_t x, int32_t quantized_multiplier, int shift) {
|
|
||||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
|
||||||
const int left_shift = shift > 0 ? shift : 0;
|
|
||||||
const int right_shift = shift > 0 ? 0 : -shift;
|
|
||||||
const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
|
|
||||||
return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
|
|
||||||
quantized_multiplier) +
|
|
||||||
rounding_offset) >>
|
|
||||||
right_shift;
|
|
||||||
}
|
|
||||||
#endif // TFLITE_SINGLE_ROUNDING
|
|
||||||
|
|
||||||
template <DepthwiseConvOutputRounding output_rounding>
|
|
||||||
struct DepthwiseConvBasicKernel {
|
|
||||||
static inline void Run(
|
|
||||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
|
||||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
uint8_t* output_data) {
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
const int depth_multiplier = params.depth_multiplier;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
const int32_t input_offset = params.input_offset;
|
|
||||||
const int32_t filter_offset = params.weights_offset;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
const int32_t output_multiplier = params.output_multiplier;
|
|
||||||
const int output_shift = params.output_shift;
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
|
|
||||||
for (int b = 0; b < batches; ++b) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int ic = 0; ic < input_depth; ++ic) {
|
|
||||||
for (int m = 0; m < depth_multiplier; m++) {
|
|
||||||
const int oc = m + ic * depth_multiplier;
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
const int in_x =
|
|
||||||
in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
const int in_y =
|
|
||||||
in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
// If the location is outside the bounds of the input image,
|
|
||||||
// use zero as a default value.
|
|
||||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height)) {
|
|
||||||
int32_t input_val =
|
|
||||||
input_data[Offset(input_shape, b, in_y, in_x, ic)];
|
|
||||||
int32_t filter_val = filter_data[Offset(
|
|
||||||
filter_shape, 0, filter_y, filter_x, oc)];
|
|
||||||
acc += (filter_val + filter_offset) *
|
|
||||||
(input_val + input_offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[oc];
|
|
||||||
}
|
|
||||||
acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
|
|
||||||
output_shift);
|
|
||||||
acc += output_offset;
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
|
|
||||||
static_cast<uint8_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(b/148596273): Reconcile reference versions, perhaps with common
|
|
||||||
// MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
|
|
||||||
static inline void RunPerChannel(
|
|
||||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
|
||||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int8_t* output_data) {
|
|
||||||
// Get parameters.
|
|
||||||
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
const int depth_multiplier = params.depth_multiplier;
|
|
||||||
const int32_t input_offset = params.input_offset;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
const int32_t* output_multiplier = params.output_multiplier_per_channel;
|
|
||||||
const int32_t* output_shift = params.output_shift_per_channel;
|
|
||||||
|
|
||||||
// Check dimensions of the tensors.
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
|
||||||
for (int m = 0; m < depth_multiplier; ++m) {
|
|
||||||
const int output_channel = m + in_channel * depth_multiplier;
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
const int in_x =
|
|
||||||
in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
const int in_y =
|
|
||||||
in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
// Zero padding by omitting the areas outside the image.
|
|
||||||
const bool is_point_inside_image =
|
|
||||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height);
|
|
||||||
if (is_point_inside_image) {
|
|
||||||
int32_t input_val = input_data[Offset(
|
|
||||||
input_shape, batch, in_y, in_x, in_channel)];
|
|
||||||
int32_t filter_val = filter_data[Offset(
|
|
||||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
|
||||||
// Accumulate with 32 bits accumulator.
|
|
||||||
// In the nudging process during model quantization, we
|
|
||||||
// force real value of 0.0 be represented by a quantized
|
|
||||||
// value. This guarantees that the input_offset is a int8_t,
|
|
||||||
// even though it is represented using int32_t. int32_t +=
|
|
||||||
// int8_t
|
|
||||||
// * (int8_t - int8_t) so the highest value we can get from
|
|
||||||
// each accumulation is [-127, 127] * ([-128, 127] -
|
|
||||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
|
||||||
// = 14.98, which means we can accumulate at least 2^16
|
|
||||||
// multiplications without overflow. The accumulator is
|
|
||||||
// applied to a filter so the accumulation logic will hold
|
|
||||||
// as long as the filter size (filter_y * filter_x *
|
|
||||||
// in_channel) does not exceed 2^16, which is the case in
|
|
||||||
// all the models we have seen so far.
|
|
||||||
acc += filter_val * (input_val + input_offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[output_channel];
|
|
||||||
}
|
|
||||||
acc = DepthwiseConvRound<output_rounding>(
|
|
||||||
acc, output_multiplier[output_channel],
|
|
||||||
output_shift[output_channel]);
|
|
||||||
acc += output_offset;
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x,
|
|
||||||
output_channel)] = static_cast<int8_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace depthwise_conv
|
|
||||||
|
|
||||||
inline void DepthwiseConv(
|
|
||||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
|
||||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
uint8_t* output_data) {
|
|
||||||
return depthwise_conv::DepthwiseConvBasicKernel<
|
|
||||||
DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
|
|
||||||
input_data, filter_shape,
|
|
||||||
filter_data, bias_shape,
|
|
||||||
bias_data, output_shape,
|
|
||||||
output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // end namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
|
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
|
|
||||||
|
|
||||||
#include <limits.h>
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
// Dequantizes into a float without rounding.
|
|
||||||
template <typename InputT, typename OutputT>
|
|
||||||
inline void Dequantize(const tflite::DequantizationParams& op_params,
|
|
||||||
const RuntimeShape& input_shape,
|
|
||||||
const InputT* input_data,
|
|
||||||
const RuntimeShape& output_shape, OutputT* output_data) {
|
|
||||||
int32_t zero_point = op_params.zero_point;
|
|
||||||
const double scale = op_params.scale;
|
|
||||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
|
||||||
|
|
||||||
for (int i = 0; i < flat_size; i++) {
|
|
||||||
const int32_t val = input_data[i];
|
|
||||||
const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
|
|
||||||
output_data[i] = result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dequantizes per-channel quantized tensor to float.
|
|
||||||
template <typename T>
|
|
||||||
inline void PerChannelDequantize(
|
|
||||||
const tflite::PerChannelDequantizationParams& op_params,
|
|
||||||
const RuntimeShape& input_shape, const T* input_data,
|
|
||||||
const RuntimeShape& output_shape, float* output_data) {
|
|
||||||
// Ensure flat size is same.
|
|
||||||
MatchingFlatSize(input_shape, output_shape);
|
|
||||||
|
|
||||||
const int32_t* zero_point = op_params.zero_point;
|
|
||||||
const float* scale = op_params.scale;
|
|
||||||
const int32_t quantized_dimension = op_params.quantized_dimension;
|
|
||||||
const int32_t num_dims = input_shape.DimensionsCount();
|
|
||||||
const int32_t* dims_data = input_shape.DimsData();
|
|
||||||
std::vector<int> current_dim(num_dims, 0);
|
|
||||||
|
|
||||||
do {
|
|
||||||
size_t offset =
|
|
||||||
ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
|
|
||||||
current_dim.data(), 0, nullptr);
|
|
||||||
const int channel = current_dim[quantized_dimension];
|
|
||||||
const int32_t val = input_data[offset];
|
|
||||||
const float result =
|
|
||||||
static_cast<float>(scale[channel] * (val - zero_point[channel]));
|
|
||||||
output_data[offset] = result;
|
|
||||||
} while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
|
|
||||||
current_dim.data()));
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
|
|
||||||
} // namespace tflite
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
|
|
||||||
@@ -1,247 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void DivCheckArithmeticParams(const ArithmeticParams& params) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
// Input offset is negative input zero point. Activation tensors are
|
|
||||||
// asymmetric quantized so they span the full int8 range.
|
|
||||||
constexpr int32_t max_value =
|
|
||||||
static_cast<int32_t>(std::numeric_limits<T>::max());
|
|
||||||
TFLITE_DCHECK_GE(params.input1_offset, -max_value);
|
|
||||||
TFLITE_DCHECK_LE(params.input1_offset, max_value);
|
|
||||||
TFLITE_DCHECK_GE(params.input2_offset, -max_value);
|
|
||||||
TFLITE_DCHECK_LE(params.input2_offset, max_value);
|
|
||||||
TFLITE_DCHECK_GE(params.output_offset, -max_value);
|
|
||||||
TFLITE_DCHECK_LE(params.output_offset, max_value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Element-wise div that can often be used for inner loop of broadcast Div as
|
|
||||||
// well as the non-broadcast Div.
|
|
||||||
template <typename T>
|
|
||||||
inline void DivElementwise(int size, const ArithmeticParams& params,
|
|
||||||
const T* input1_data, const T* input2_data,
|
|
||||||
T* output_data) {
|
|
||||||
DivCheckArithmeticParams<T>(params);
|
|
||||||
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
int32_t input1_val = params.input1_offset + input1_data[i];
|
|
||||||
int32_t input2_val = params.input2_offset + input2_data[i];
|
|
||||||
TFLITE_DCHECK_NE(input2_val, 0);
|
|
||||||
if (input2_val < 0) {
|
|
||||||
// Invert signs to avoid a negative input2_val as input2_inv needs to be
|
|
||||||
// positive to be used as multiplier of MultiplyByQuantizedMultiplier.
|
|
||||||
input1_val = -input1_val;
|
|
||||||
input2_val = -input2_val;
|
|
||||||
}
|
|
||||||
int recip_shift;
|
|
||||||
const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
|
|
||||||
const int headroom = CountLeadingSignBits(input1_val);
|
|
||||||
const int32_t unscaled_quotient =
|
|
||||||
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
|
|
||||||
headroom);
|
|
||||||
const int total_shift = params.output_shift - recip_shift - headroom;
|
|
||||||
const int32_t unclamped_result =
|
|
||||||
params.output_offset +
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
unscaled_quotient, params.output_multiplier, total_shift);
|
|
||||||
const int32_t clamped_output =
|
|
||||||
std::min(params.quantized_activation_max,
|
|
||||||
std::max(params.quantized_activation_min, unclamped_result));
|
|
||||||
output_data[i] = static_cast<T>(clamped_output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Div(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const uint8_t* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const uint8_t* input2_data,
|
|
||||||
const RuntimeShape& output_shape, uint8_t* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
const int flat_size =
|
|
||||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
|
||||||
|
|
||||||
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Div(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const int8_t* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const int8_t* input2_data,
|
|
||||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
const int flat_size =
|
|
||||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
|
||||||
|
|
||||||
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, int N = 5>
|
|
||||||
inline void BroadcastDivSlowQuantized(
|
|
||||||
const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape,
|
|
||||||
const T* input1_data, const RuntimeShape& unextended_input2_shape,
|
|
||||||
const T* input2_data, const RuntimeShape& unextended_output_shape,
|
|
||||||
T* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
|
|
||||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
|
|
||||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
|
|
||||||
|
|
||||||
NdArrayDesc<N> desc1;
|
|
||||||
NdArrayDesc<N> desc2;
|
|
||||||
NdArrayDesc<N> output_desc;
|
|
||||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
|
||||||
unextended_input2_shape, &desc1, &desc2);
|
|
||||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
|
|
||||||
&output_desc);
|
|
||||||
|
|
||||||
DivCheckArithmeticParams<T>(params);
|
|
||||||
|
|
||||||
auto div_func = [&](int indexes[N]) {
|
|
||||||
int32_t input1_val =
|
|
||||||
params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
|
|
||||||
int32_t input2_val =
|
|
||||||
params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
|
|
||||||
TFLITE_DCHECK_NE(input2_val, 0);
|
|
||||||
if (input2_val < 0) {
|
|
||||||
// Invert signs to avoid a negative input2_val as input2_inv needs to be
|
|
||||||
// positive to be used as multiplier of MultiplyByQuantizedMultiplier.
|
|
||||||
input1_val = -input1_val;
|
|
||||||
input2_val = -input2_val;
|
|
||||||
}
|
|
||||||
int recip_shift;
|
|
||||||
const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
|
|
||||||
const int headroom = CountLeadingSignBits(input1_val);
|
|
||||||
const int32_t unscaled_quotient =
|
|
||||||
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
|
|
||||||
headroom);
|
|
||||||
const int total_shift = params.output_shift - recip_shift - headroom;
|
|
||||||
const int32_t unclamped_result =
|
|
||||||
params.output_offset +
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
unscaled_quotient, params.output_multiplier, total_shift);
|
|
||||||
const int32_t clamped_output =
|
|
||||||
std::min(params.quantized_activation_max,
|
|
||||||
std::max(params.quantized_activation_min, unclamped_result));
|
|
||||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
|
||||||
static_cast<T>(clamped_output);
|
|
||||||
};
|
|
||||||
NDOpsHelper<N>(output_desc, div_func);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <int N = 5>
|
|
||||||
inline void BroadcastDivSlow(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& unextended_input1_shape,
|
|
||||||
const uint8_t* input1_data,
|
|
||||||
const RuntimeShape& unextended_input2_shape,
|
|
||||||
const uint8_t* input2_data,
|
|
||||||
const RuntimeShape& unextended_output_shape,
|
|
||||||
uint8_t* output_data) {
|
|
||||||
BroadcastDivSlowQuantized<uint8_t, N>(
|
|
||||||
params, unextended_input1_shape, input1_data, unextended_input2_shape,
|
|
||||||
input2_data, unextended_output_shape, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <int N = 5>
|
|
||||||
inline void BroadcastDivSlow(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& unextended_input1_shape,
|
|
||||||
const int8_t* input1_data,
|
|
||||||
const RuntimeShape& unextended_input2_shape,
|
|
||||||
const int8_t* input2_data,
|
|
||||||
const RuntimeShape& unextended_output_shape,
|
|
||||||
int8_t* output_data) {
|
|
||||||
BroadcastDivSlowQuantized<int8_t, N>(
|
|
||||||
params, unextended_input1_shape, input1_data, unextended_input2_shape,
|
|
||||||
input2_data, unextended_output_shape, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
|
|
||||||
// dimensionality if the runtime code does a single loop over one dimension
|
|
||||||
// that handles broadcasting as the base case. The code generator would then
|
|
||||||
// generate max(D1, D2) nested for loops.
|
|
||||||
template <typename T, int N = 5>
|
|
||||||
void BroadcastDivSlow(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& unextended_input1_shape,
|
|
||||||
const T* input1_data,
|
|
||||||
const RuntimeShape& unextended_input2_shape,
|
|
||||||
const T* input2_data,
|
|
||||||
const RuntimeShape& unextended_output_shape,
|
|
||||||
T* output_data) {
|
|
||||||
T output_activation_min;
|
|
||||||
T output_activation_max;
|
|
||||||
GetActivationParams(params, &output_activation_min, &output_activation_max);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
|
|
||||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
|
|
||||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
|
|
||||||
|
|
||||||
NdArrayDesc<N> desc1;
|
|
||||||
NdArrayDesc<N> desc2;
|
|
||||||
NdArrayDesc<N> output_desc;
|
|
||||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
|
||||||
unextended_input2_shape, &desc1, &desc2);
|
|
||||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
|
|
||||||
&output_desc);
|
|
||||||
|
|
||||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
|
||||||
// col, channel), with extents (batches, height, width, depth), with the
|
|
||||||
// trailing dimension changing most rapidly (channels has the smallest
|
|
||||||
// stride, typically 1 element).
|
|
||||||
//
|
|
||||||
// In generated C code, we store arrays with the dimensions reversed. The
|
|
||||||
// first dimension has smallest stride.
|
|
||||||
|
|
||||||
auto div_func = [&](int indexes[N]) {
|
|
||||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
|
||||||
ActivationFunctionWithMinMax(
|
|
||||||
input1_data[SubscriptToIndex(desc1, indexes)] /
|
|
||||||
input2_data[SubscriptToIndex(desc2, indexes)],
|
|
||||||
output_activation_min, output_activation_max);
|
|
||||||
};
|
|
||||||
NDOpsHelper<N>(output_desc, div_func);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void Div(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const T* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const T* input2_data,
|
|
||||||
const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
T output_activation_min;
|
|
||||||
T output_activation_max;
|
|
||||||
GetActivationParams(params, &output_activation_min, &output_activation_max);
|
|
||||||
|
|
||||||
const int flat_size =
|
|
||||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
|
||||||
for (int i = 0; i < flat_size; ++i) {
|
|
||||||
output_data[i] = ActivationFunctionWithMinMax(
|
|
||||||
input1_data[i] / input2_data[i], output_activation_min,
|
|
||||||
output_activation_max);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
inline void Elu(const RuntimeShape& input_shape, const float* input_data,
|
|
||||||
const RuntimeShape& output_shape, float* output_data) {
|
|
||||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
|
||||||
for (int i = 0; i < flat_size; ++i) {
|
|
||||||
const float val = input_data[i];
|
|
||||||
output_data[i] = val < 0.0f ? TfLiteExpm1(val) : val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void Exp(const T* input_data, const size_t num_elements,
|
|
||||||
T* output_data) {
|
|
||||||
ruy::profiler::ScopeLabel label("Exp");
|
|
||||||
for (size_t idx = 0; idx < num_elements; ++idx) {
|
|
||||||
output_data[idx] = std::exp(input_data[idx]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void Fill(const RuntimeShape& value_shape, const T* value_data,
|
|
||||||
const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
TFLITE_DCHECK_EQ(value_shape.DimensionsCount(), 0);
|
|
||||||
const int flat_size = output_shape.FlatSize();
|
|
||||||
for (int i = 0; i < flat_size; ++i) {
|
|
||||||
output_data[i] = *value_data;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
inline void Floor(const RuntimeShape& input_shape, const float* input_data,
|
|
||||||
const RuntimeShape& output_shape, float* output_data) {
|
|
||||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
|
||||||
|
|
||||||
for (int i = 0; i < flat_size; i++) {
|
|
||||||
int offset = i;
|
|
||||||
output_data[offset] = std::floor(input_data[offset]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include <functional>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
T FloorDiv(T input1, T input2) {
|
|
||||||
return std::floor(std::divides<double>()(static_cast<double>(input1),
|
|
||||||
static_cast<double>(input2)));
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include <functional>
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
T FloorMod(T input1, T input2) {
|
|
||||||
struct FloatMod {
|
|
||||||
float operator()(const float lhs, const float rhs) const {
|
|
||||||
return std::fmod(lhs, rhs);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
using ModFunc = typename std::conditional<std::is_integral<T>::value,
|
|
||||||
std::modulus<T>, FloatMod>::type;
|
|
||||||
ModFunc mod_func;
|
|
||||||
T trunc_mod = mod_func(input1, input2);
|
|
||||||
return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0))
|
|
||||||
? (trunc_mod + input2)
|
|
||||||
: trunc_mod;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
|
|
||||||
@@ -1,323 +0,0 @@
|
|||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
inline void FullyConnected(
|
|
||||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
|
||||||
const float* input_data, const RuntimeShape& weights_shape,
|
|
||||||
const float* weights_data, const RuntimeShape& bias_shape,
|
|
||||||
const float* bias_data, const RuntimeShape& output_shape,
|
|
||||||
float* output_data) {
|
|
||||||
const float output_activation_min = params.float_activation_min;
|
|
||||||
const float output_activation_max = params.float_activation_max;
|
|
||||||
// TODO(b/62193649): This really should be:
|
|
||||||
// const int batches = ArraySize(output_dims, 1);
|
|
||||||
// but the current --variable_batch hack consists in overwriting the 3rd
|
|
||||||
// dimension with the runtime batch size, as we don't keep track for each
|
|
||||||
// array of which dimension is the batch dimension in it.
|
|
||||||
const int output_dims_count = output_shape.DimensionsCount();
|
|
||||||
const int weights_dims_count = weights_shape.DimensionsCount();
|
|
||||||
const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
|
|
||||||
const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
|
|
||||||
output_shape, output_dims_count - 1);
|
|
||||||
const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
|
|
||||||
for (int b = 0; b < batches; ++b) {
|
|
||||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
|
||||||
float total = 0.f;
|
|
||||||
for (int d = 0; d < accum_depth; ++d) {
|
|
||||||
total += input_data[b * accum_depth + d] *
|
|
||||||
weights_data[out_c * accum_depth + d];
|
|
||||||
}
|
|
||||||
float bias_value = 0.0f;
|
|
||||||
if (bias_data) {
|
|
||||||
bias_value = bias_data[out_c];
|
|
||||||
}
|
|
||||||
output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
|
|
||||||
total + bias_value, output_activation_min, output_activation_max);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void FullyConnected(
|
|
||||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
|
||||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
uint8_t* output_data) {
|
|
||||||
const int32_t input_offset = params.input_offset;
|
|
||||||
const int32_t filter_offset = params.weights_offset;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
const int32_t output_multiplier = params.output_multiplier;
|
|
||||||
const int output_shift = params.output_shift;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
|
||||||
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
// TODO(b/62193649): This really should be:
|
|
||||||
// const int batches = ArraySize(output_dims, 1);
|
|
||||||
// but the current --variable_batch hack consists in overwriting the 3rd
|
|
||||||
// dimension with the runtime batch size, as we don't keep track for each
|
|
||||||
// array of which dimension is the batch dimension in it.
|
|
||||||
const int output_dim_count = output_shape.DimensionsCount();
|
|
||||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
|
||||||
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
|
|
||||||
output_shape, output_dim_count - 1);
|
|
||||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
|
||||||
for (int b = 0; b < batches; ++b) {
|
|
||||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int d = 0; d < accum_depth; ++d) {
|
|
||||||
int32_t input_val = input_data[b * accum_depth + d];
|
|
||||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
|
||||||
acc += (filter_val + filter_offset) * (input_val + input_offset);
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[out_c];
|
|
||||||
}
|
|
||||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
|
|
||||||
acc += output_offset;
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void FullyConnected(
|
|
||||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
|
||||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int16_t* output_data) {
|
|
||||||
const int32_t input_offset = params.input_offset;
|
|
||||||
const int32_t filter_offset = params.weights_offset;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
const int32_t output_multiplier = params.output_multiplier;
|
|
||||||
const int output_shift = params.output_shift;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
TFLITE_DCHECK_EQ(output_offset, 0);
|
|
||||||
// TODO(b/62193649): This really should be:
|
|
||||||
// const int batches = ArraySize(output_dims, 1);
|
|
||||||
// but the current --variable_batch hack consists in overwriting the 3rd
|
|
||||||
// dimension with the runtime batch size, as we don't keep track for each
|
|
||||||
// array of which dimension is the batch dimension in it.
|
|
||||||
const int output_dim_count = output_shape.DimensionsCount();
|
|
||||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
|
||||||
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
|
|
||||||
output_shape, output_dim_count - 1);
|
|
||||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
|
||||||
for (int b = 0; b < batches; ++b) {
|
|
||||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
|
||||||
// Internal accumulation.
|
|
||||||
// Initialize accumulator with the bias-value.
|
|
||||||
int32_t accum = bias_data[out_c];
|
|
||||||
// Accumulation loop.
|
|
||||||
for (int d = 0; d < accum_depth; ++d) {
|
|
||||||
int16_t input_val = input_data[b * accum_depth + d] + input_offset;
|
|
||||||
int16_t filter_val =
|
|
||||||
filter_data[out_c * accum_depth + d] + filter_offset;
|
|
||||||
accum += filter_val * input_val;
|
|
||||||
}
|
|
||||||
// Down-scale the final int32_t accumulator to the scale used by our
|
|
||||||
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
|
|
||||||
// multiplier and shift here have been pre-computed offline
|
|
||||||
// (e.g. by toco).
|
|
||||||
accum =
|
|
||||||
MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
|
|
||||||
// Saturate, cast to int16_t, and store to output array.
|
|
||||||
accum = std::max(accum, output_activation_min - output_offset);
|
|
||||||
accum = std::min(accum, output_activation_max - output_offset);
|
|
||||||
accum += output_offset;
|
|
||||||
output_data[out_c + output_depth * b] = accum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void ShuffledFullyConnected(
|
|
||||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
|
||||||
const uint8_t* input_data, const RuntimeShape& weights_shape,
|
|
||||||
const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
|
|
||||||
const int32_t output_multiplier = params.output_multiplier;
|
|
||||||
const int output_shift = params.output_shift;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
|
|
||||||
TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
|
|
||||||
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
|
|
||||||
// TODO(b/62193649): This really should be:
|
|
||||||
// const int batches = ArraySize(output_dims, 1);
|
|
||||||
// but the current --variable_batch hack consists in overwriting the 3rd
|
|
||||||
// dimension with the runtime batch size, as we don't keep track for each
|
|
||||||
// array of which dimension is the batch dimension in it.
|
|
||||||
const int output_dim_count = output_shape.DimensionsCount();
|
|
||||||
const int weights_dim_count = weights_shape.DimensionsCount();
|
|
||||||
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
|
|
||||||
const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
|
|
||||||
output_shape, output_dim_count - 1);
|
|
||||||
const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
|
|
||||||
TFLITE_DCHECK((accum_depth % 16) == 0);
|
|
||||||
TFLITE_DCHECK((output_depth % 4) == 0);
|
|
||||||
|
|
||||||
// Shuffling and xoring of input activations into the workspace buffer
|
|
||||||
uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
|
|
||||||
if (batches == 1) {
|
|
||||||
for (int i = 0; i < accum_depth; i++) {
|
|
||||||
shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
|
|
||||||
}
|
|
||||||
} else if (batches == 4) {
|
|
||||||
for (int c = 0; c < accum_depth; c += 16) {
|
|
||||||
for (int b = 0; b < 4; b++) {
|
|
||||||
const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
|
|
||||||
for (int j = 0; j < 16; j++) {
|
|
||||||
uint8_t src_val = *src_data_ptr++;
|
|
||||||
// Flip the sign bit, so that the kernel will only need to
|
|
||||||
// reinterpret these uint8_t values as int8_t, getting for free the
|
|
||||||
// subtraction of the zero_point value 128.
|
|
||||||
uint8_t dst_val = src_val ^ 0x80;
|
|
||||||
*shuffled_input_workspace_ptr++ = dst_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
TFLITE_DCHECK(false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Actual computation
|
|
||||||
if (batches == 1) {
|
|
||||||
int16_t* output_ptr = output_data;
|
|
||||||
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
|
|
||||||
// so that just reinterpreting them as int8_t values is equivalent to
|
|
||||||
// subtracting 128 from them, thus implementing for free the subtraction of
|
|
||||||
// the zero_point value 128.
|
|
||||||
const int8_t* shuffled_weights_ptr =
|
|
||||||
reinterpret_cast<const int8_t*>(shuffled_weights_data);
|
|
||||||
// Likewise, we preshuffled and pre-xored the input data above.
|
|
||||||
const int8_t* shuffled_input_data =
|
|
||||||
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
|
|
||||||
for (int c = 0; c < output_depth; c += 4) {
|
|
||||||
// Internal accumulation.
|
|
||||||
// Initialize accumulator with the bias-value.
|
|
||||||
int32_t accum[4] = {0};
|
|
||||||
// Accumulation loop.
|
|
||||||
for (int d = 0; d < accum_depth; d += 16) {
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
for (int j = 0; j < 16; j++) {
|
|
||||||
int8_t input_val = shuffled_input_data[d + j];
|
|
||||||
int8_t weights_val = *shuffled_weights_ptr++;
|
|
||||||
accum[i] += weights_val * input_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
// Add bias value
|
|
||||||
int32_t acc = accum[i] + bias_data[c + i];
|
|
||||||
// Down-scale the final int32_t accumulator to the scale used by our
|
|
||||||
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
|
|
||||||
// multiplier and shift here have been pre-computed offline
|
|
||||||
// (e.g. by toco).
|
|
||||||
acc =
|
|
||||||
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
|
|
||||||
// Saturate, cast to int16_t, and store to output array.
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_ptr[c + i] = acc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (batches == 4) {
|
|
||||||
int16_t* output_ptr = output_data;
|
|
||||||
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
|
|
||||||
// so that just reinterpreting them as int8_t values is equivalent to
|
|
||||||
// subtracting 128 from them, thus implementing for free the subtraction of
|
|
||||||
// the zero_point value 128.
|
|
||||||
const int8_t* shuffled_weights_ptr =
|
|
||||||
reinterpret_cast<const int8_t*>(shuffled_weights_data);
|
|
||||||
// Likewise, we preshuffled and pre-xored the input data above.
|
|
||||||
const int8_t* shuffled_input_data =
|
|
||||||
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
|
|
||||||
for (int c = 0; c < output_depth; c += 4) {
|
|
||||||
const int8_t* shuffled_input_ptr = shuffled_input_data;
|
|
||||||
// Accumulation loop.
|
|
||||||
// Internal accumulation.
|
|
||||||
// Initialize accumulator with the bias-value.
|
|
||||||
int32_t accum[4][4];
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
for (int b = 0; b < 4; b++) {
|
|
||||||
accum[i][b] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int d = 0; d < accum_depth; d += 16) {
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
for (int b = 0; b < 4; b++) {
|
|
||||||
for (int j = 0; j < 16; j++) {
|
|
||||||
int8_t input_val = shuffled_input_ptr[16 * b + j];
|
|
||||||
int8_t weights_val = shuffled_weights_ptr[16 * i + j];
|
|
||||||
accum[i][b] += weights_val * input_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
shuffled_input_ptr += 64;
|
|
||||||
shuffled_weights_ptr += 64;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
for (int b = 0; b < 4; b++) {
|
|
||||||
// Add bias value
|
|
||||||
int32_t acc = accum[i][b] + bias_data[c + i];
|
|
||||||
// Down-scale the final int32_t accumulator to the scale used by our
|
|
||||||
// (16-bit, typically 3 integer bits) fixed-point format. The
|
|
||||||
// quantized multiplier and shift here have been pre-computed offline
|
|
||||||
// (e.g. by toco).
|
|
||||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
|
|
||||||
output_shift);
|
|
||||||
// Saturate, cast to int16_t, and store to output array.
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_ptr[b * output_depth + c + i] = acc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
TFLITE_DCHECK(false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
|
|
||||||
@@ -1,168 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
inline int16_t SaturatingLeftShift(int16_t value, int amount) {
|
|
||||||
int64_t result = static_cast<int64_t>(value) * (1 << amount);
|
|
||||||
result = std::min<int64_t>(result, std::numeric_limits<int16_t>::max());
|
|
||||||
result = std::max<int64_t>(result, std::numeric_limits<int16_t>::min());
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Similar to ARM instruction SQDMULH.
|
|
||||||
// Similar to gemmlowp::SaturatingRoundingDoublingHighMul except
|
|
||||||
// rounding to zero instead of to nearest (SQRDMULH).
|
|
||||||
inline std::int16_t SaturatingDoublingHighMul(std::int16_t a, std::int16_t b) {
|
|
||||||
bool overflow = a == b && a == std::numeric_limits<std::int16_t>::min();
|
|
||||||
std::int32_t a_32(a);
|
|
||||||
std::int32_t b_32(b);
|
|
||||||
std::int32_t ab_32 = a_32 * b_32;
|
|
||||||
std::int16_t ab_x2_high16 = static_cast<std::int16_t>((ab_32) / (1 << 15));
|
|
||||||
return overflow ? std::numeric_limits<std::int16_t>::max() : ab_x2_high16;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void HardSwish(const RuntimeShape& input_shape, const T* input_data,
|
|
||||||
const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
ruy::profiler::ScopeLabel label("ReferenceHardSwish/Float");
|
|
||||||
auto matching_size = MatchingFlatSize(input_shape, output_shape);
|
|
||||||
const T* in_end = input_data + matching_size;
|
|
||||||
for (; input_data < in_end; input_data++, output_data++) {
|
|
||||||
const float in = *input_data;
|
|
||||||
*output_data =
|
|
||||||
in * std::min(static_cast<T>(6), std::max(static_cast<T>(0), in + 3)) /
|
|
||||||
6;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void HardSwish(const HardSwishParams& params,
|
|
||||||
const RuntimeShape& input_shape, const T* input_data,
|
|
||||||
const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
ruy::profiler::ScopeLabel label("ReferenceHardSwish/Quantized");
|
|
||||||
|
|
||||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
|
||||||
|
|
||||||
for (int i = 0; i < flat_size; i++) {
|
|
||||||
const int16_t input_value = input_data[i] - params.input_zero_point;
|
|
||||||
// Left-shift as much as we can without overflow/saturation to put
|
|
||||||
// significant bits in the high bits of our 16-bit fixedpoint values, so
|
|
||||||
// that fixed-point approximate computations below are as accurate as
|
|
||||||
// possible.
|
|
||||||
const int16_t input_value_on_hires_input_scale = input_value * (1 << 7);
|
|
||||||
// Compute the input value on essentially the output scale, just not
|
|
||||||
// right-shifted yet. This is the value that we'll use in the (x >= +3)
|
|
||||||
// case, and that in the general case we'll multiply against the "relu-ish"
|
|
||||||
// fixed-point multiplier in [0, 1].
|
|
||||||
const int16_t input_value_on_preshift_output_scale =
|
|
||||||
gemmlowp::SaturatingRoundingDoublingHighMul(
|
|
||||||
input_value_on_hires_input_scale,
|
|
||||||
params.output_multiplier_fixedpoint_int16);
|
|
||||||
// Now compute the "relu-ish multiplier". In the (-3 <= x <= +3) case, that
|
|
||||||
// is just an affine rescaling of x from [-3, 3] to [0, 1]. In the general
|
|
||||||
// case, it is just that plus saturation at the boundaries of [-3, 3].
|
|
||||||
// First, we rescale from [-3, 3] to [-1, 1], saturating.
|
|
||||||
// That is done by rescaling the input value with a fixed-point multiplier
|
|
||||||
// (reluish_multiplier_fixedpoint) and bit-shift such that we represent
|
|
||||||
// that input value on the scale where the real value 3.0f is represented
|
|
||||||
// by the quantized value 32768. (+32768 is actually not representable as
|
|
||||||
// int16_t, so this saturates at +32767, and that is seen empirically to be
|
|
||||||
// a negligible contribution to numerical error/bias).
|
|
||||||
//
|
|
||||||
// This code is careful to correctly implement any magnitude of multiplier,
|
|
||||||
// involving either a right shift or a left shift, with correct saturation
|
|
||||||
// behavior in the left-shift case. This forces this code to be more
|
|
||||||
// complicated, but is necessary for real applications: a partially
|
|
||||||
// trained quantized MobileNet v3-small model that motivated this code
|
|
||||||
// exhibits some large [min, max] range boundaries, of the order of
|
|
||||||
// magnitude of 10 or 100 depending on layers.
|
|
||||||
//
|
|
||||||
// The next few lines are basically just an ordinary
|
|
||||||
// MultiplyByQuantizedMultiplier, except that we are more careful here
|
|
||||||
// about the fine details of saturation when left-shifting, because here
|
|
||||||
// overflow in left-shift is a common case, not an anomaly as
|
|
||||||
// MultiplyByQuantizedMultiplier assumes.
|
|
||||||
int16_t reluish_value = input_value_on_hires_input_scale;
|
|
||||||
// Shift left, saturating, as much as we can while ensuring that this
|
|
||||||
// saturation will not contribute to the result. That is, left shift amount
|
|
||||||
// reduced by 1.
|
|
||||||
if (params.reluish_multiplier_exponent > 0) {
|
|
||||||
reluish_value = SaturatingLeftShift(
|
|
||||||
reluish_value, params.reluish_multiplier_exponent - 1);
|
|
||||||
}
|
|
||||||
// Apply the fixed-point multiplier, dividing the value by a divisor
|
|
||||||
// ranging in [1, 2].
|
|
||||||
reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul(
|
|
||||||
reluish_value, params.reluish_multiplier_fixedpoint_int16);
|
|
||||||
// Apply the last bit of left-shift. Thus, in the left-shifting case, if
|
|
||||||
// any saturation affects the result, it is happening here --- any
|
|
||||||
// saturation having occurred above is overwritten here, not affecting the
|
|
||||||
// result.
|
|
||||||
if (params.reluish_multiplier_exponent > 0) {
|
|
||||||
reluish_value = SaturatingLeftShift(reluish_value, 1);
|
|
||||||
}
|
|
||||||
// Shift right, in the right-shifting case.
|
|
||||||
if (params.reluish_multiplier_exponent < 0) {
|
|
||||||
reluish_value = gemmlowp::RoundingDivideByPOT(
|
|
||||||
reluish_value, -params.reluish_multiplier_exponent);
|
|
||||||
}
|
|
||||||
// At this point we have rescaled the value into a 16bit fixedpoint
|
|
||||||
// reluish_value in [-1, 1].
|
|
||||||
// We now convert that to a 16bit fixedpoint value in [0, 1].
|
|
||||||
reluish_value = (reluish_value + (1 << 15)) >> 1;
|
|
||||||
// Use of SaturatingDoublingHighMul here is important to cancel the biases
|
|
||||||
// from the above SaturatingRoundingDoublingHighMul.
|
|
||||||
//
|
|
||||||
// On a partially trained MobileNet-v3-small,
|
|
||||||
//
|
|
||||||
// | bias on | ImageNet
|
|
||||||
// | quantized | Top-1
|
|
||||||
// Operation used here | values | accuracy (50k)
|
|
||||||
// --------------------------------------+------------+-----------
|
|
||||||
// SaturatingDoublingHighMul | -0.0024 | 58.920
|
|
||||||
// SaturatingRoundingDoublingHighMul | -0.0067 | 58.064
|
|
||||||
//
|
|
||||||
// In activations_test, this is covered by this testcase:
|
|
||||||
// QuantizedActivationsOpTest.HardSwishBias
|
|
||||||
//
|
|
||||||
const int16_t preshift_output_value = SaturatingDoublingHighMul(
|
|
||||||
reluish_value, input_value_on_preshift_output_scale);
|
|
||||||
// We were so far operating on the pre-shift output scale. Now we finally
|
|
||||||
// apply that output shift, arriving at the final output scale.
|
|
||||||
int16_t output_value = gemmlowp::RoundingDivideByPOT(
|
|
||||||
preshift_output_value, -params.output_multiplier_exponent);
|
|
||||||
output_value += params.output_zero_point;
|
|
||||||
output_value =
|
|
||||||
std::min<int16_t>(output_value, std::numeric_limits<T>::max());
|
|
||||||
output_value =
|
|
||||||
std::max<int16_t>(output_value, std::numeric_limits<T>::min());
|
|
||||||
output_data[i] = output_value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
|
||||||
@@ -1,145 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
|
|
||||||
inline void CheckArithmeticParams(const ArithmeticParams& params) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
// Input offset is negative input zero point. Activation tensors are
|
|
||||||
// asymmetric quantized so they span the full int8 range.
|
|
||||||
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
|
|
||||||
TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
|
|
||||||
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
|
|
||||||
TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void ElementWise(
|
|
||||||
int size, const ArithmeticParams& params, const int8_t* input1_data,
|
|
||||||
const int8_t* input2_data, int8_t* output_data,
|
|
||||||
void (*check_arithmetic_params)(const ArithmeticParams&),
|
|
||||||
int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
|
|
||||||
CheckArithmeticParams(params);
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
output_data[i] = binary_func(input1_data[i], input2_data[i], params);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void BroadcastBinaryFunction4DSlow(
|
|
||||||
const ArithmeticParams& params, const RuntimeShape& input1_shape,
|
|
||||||
const int8_t* input1_data, const RuntimeShape& input2_shape,
|
|
||||||
const int8_t* input2_data, const RuntimeShape& output_shape,
|
|
||||||
int8_t* output_data,
|
|
||||||
void (*check_arithmetic_params)(const ArithmeticParams&),
|
|
||||||
int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
|
|
||||||
NdArrayDesc<4> desc1;
|
|
||||||
NdArrayDesc<4> desc2;
|
|
||||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
|
||||||
&desc2);
|
|
||||||
const RuntimeShape extended_output_shape =
|
|
||||||
RuntimeShape::ExtendedShape(4, output_shape);
|
|
||||||
|
|
||||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
|
||||||
// col, channel), with extents (batches, height, width, depth), with the
|
|
||||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
|
||||||
// typically 1 element).
|
|
||||||
//
|
|
||||||
// In generated C code, we store arrays with the dimensions reversed. The
|
|
||||||
// first dimension has smallest stride.
|
|
||||||
//
|
|
||||||
// We name our variables by their Tensorflow convention, but generate C code
|
|
||||||
// nesting loops such that the innermost loop has the smallest stride for the
|
|
||||||
// best cache behavior.
|
|
||||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
|
||||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
|
||||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
|
||||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
|
||||||
output_data[Offset(extended_output_shape, b, y, x, c)] = binary_func(
|
|
||||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)],
|
|
||||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)], params);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) {
|
|
||||||
const int32_t input1_val = params.input1_offset + x;
|
|
||||||
const int32_t input2_val = params.input2_offset + y;
|
|
||||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
|
||||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
|
||||||
const int32_t scaled_input1_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
|
||||||
const int32_t scaled_input2_val =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
|
||||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
|
||||||
const int32_t raw_output =
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
raw_sum, params.output_multiplier, params.output_shift) +
|
|
||||||
params.output_offset;
|
|
||||||
const int32_t clamped_output =
|
|
||||||
std::min(params.quantized_activation_max,
|
|
||||||
std::max(params.quantized_activation_min, raw_output));
|
|
||||||
return static_cast<int8_t>(clamped_output);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Element-wise add that can often be used for inner loop of broadcast add as
|
|
||||||
// well as the non-broadcast add.
|
|
||||||
inline void AddElementwise(int size, const ArithmeticParams& params,
|
|
||||||
const int8_t* input1_data, const int8_t* input2_data,
|
|
||||||
int8_t* output_data) {
|
|
||||||
ElementWise(size, params, input1_data, input2_data, output_data,
|
|
||||||
CheckArithmeticParams, AddFunc);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Add(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const int8_t* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const int8_t* input2_data,
|
|
||||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
|
||||||
CheckArithmeticParams(params);
|
|
||||||
|
|
||||||
const int flat_size =
|
|
||||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
|
||||||
|
|
||||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape,
|
|
||||||
const int8_t* input1_data,
|
|
||||||
const RuntimeShape& input2_shape,
|
|
||||||
const int8_t* input2_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
int8_t* output_data) {
|
|
||||||
BroadcastBinaryFunction4DSlow(params, input1_shape, input1_data, input2_shape,
|
|
||||||
input2_data, output_shape, output_data,
|
|
||||||
CheckArithmeticParams, AddFunc);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
|
|
||||||
@@ -1,238 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
|
|
||||||
// Fixed-point per-channel-quantization convolution reference kernel.
|
|
||||||
inline void ConvPerChannel(
|
|
||||||
const ConvParams& params, const int32_t* output_multiplier,
|
|
||||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
|
||||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int8_t* output_data) {
|
|
||||||
// Get parameters.
|
|
||||||
const int32_t input_offset = params.input_offset; // r = s(q - Z)
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
|
|
||||||
// Set min and max value of the output.
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
|
|
||||||
// Consistency check.
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
|
||||||
if (bias_data) {
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check dimensions of the tensors.
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int filter_input_depth = filter_shape.Dims(3);
|
|
||||||
const int groups = input_depth / filter_input_depth;
|
|
||||||
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
|
|
||||||
const int filters_per_group = output_depth / groups;
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
|
||||||
auto group = out_channel / filters_per_group;
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
const int in_y = in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
|
|
||||||
// Zero padding by omitting the areas outside the image.
|
|
||||||
const bool is_point_inside_image =
|
|
||||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height);
|
|
||||||
|
|
||||||
if (!is_point_inside_image) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int in_channel = 0; in_channel < filter_input_depth;
|
|
||||||
++in_channel) {
|
|
||||||
int32_t input_val =
|
|
||||||
input_data[Offset(input_shape, batch, in_y, in_x,
|
|
||||||
in_channel + group * filter_input_depth)];
|
|
||||||
int32_t filter_val = filter_data[Offset(
|
|
||||||
filter_shape, out_channel, filter_y, filter_x, in_channel)];
|
|
||||||
// Accumulate with 32 bits accumulator.
|
|
||||||
// In the nudging process during model quantization, we force
|
|
||||||
// real value of 0.0 be represented by a quantized value. This
|
|
||||||
// guarantees that the input_offset is a int8_t, even though
|
|
||||||
// it is represented using int32_t. int32_t += int8_t *
|
|
||||||
// (int8_t - int8_t) so the highest value we can get from each
|
|
||||||
// accumulation is [-127, 127] * ([-128, 127] -
|
|
||||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
|
||||||
// = 14.98, which means we can accumulate at least 2^16
|
|
||||||
// multiplications without overflow. The accumulator is
|
|
||||||
// applied to a filter so the accumulation logic will hold as
|
|
||||||
// long as the filter size (filter_y * filter_x * in_channel)
|
|
||||||
// does not exceed 2^16, which is the case in all the models
|
|
||||||
// we have seen so far.
|
|
||||||
// TODO(b/174275578): Add a check to make sure the
|
|
||||||
// accumulator depth is smaller than 2^16.
|
|
||||||
acc += filter_val * (input_val + input_offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[out_channel];
|
|
||||||
}
|
|
||||||
acc = MultiplyByQuantizedMultiplier(
|
|
||||||
acc, output_multiplier[out_channel], output_shift[out_channel]);
|
|
||||||
acc += output_offset;
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
|
||||||
static_cast<int8_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fixed-point per-channel-quantization convolution reference kernel.
|
|
||||||
// 16-bit data and 8-bit filter
|
|
||||||
template <typename AccumScalar>
|
|
||||||
inline void ConvPerChannel(
|
|
||||||
const ConvParams& params, const int32_t* output_multiplier,
|
|
||||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
|
||||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const AccumScalar* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int16_t* output_data) {
|
|
||||||
// Get parameters.
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
|
|
||||||
// Set min and max value of the output.
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
|
|
||||||
// Consistency check.
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
|
||||||
if (bias_data) {
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check dimensions of the tensors.
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int filter_input_depth = filter_shape.Dims(3);
|
|
||||||
const int groups = input_depth / filter_input_depth;
|
|
||||||
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
|
|
||||||
const int filters_per_group = output_depth / groups;
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
|
||||||
auto group = out_channel / filters_per_group;
|
|
||||||
AccumScalar acc = 0;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
const int in_y = in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
|
|
||||||
// Zero padding by omitting the areas outside the image.
|
|
||||||
const bool is_point_inside_image =
|
|
||||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height);
|
|
||||||
|
|
||||||
if (!is_point_inside_image) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int in_channel = 0; in_channel < filter_input_depth;
|
|
||||||
++in_channel) {
|
|
||||||
int32_t input_val =
|
|
||||||
input_data[Offset(input_shape, batch, in_y, in_x,
|
|
||||||
in_channel + group * filter_input_depth)];
|
|
||||||
int32_t filter_val = filter_data[Offset(
|
|
||||||
filter_shape, out_channel, filter_y, filter_x, in_channel)];
|
|
||||||
// Accumulate with 64 bits accumulator.
|
|
||||||
// int64_t += int8_t * int16_t so the highest value we can
|
|
||||||
// get from each accumulation is [-127, 127] * ([-32768,
|
|
||||||
// 32767] -
|
|
||||||
// [-32768, 32767]), which is [-8322945, 8322945].
|
|
||||||
// log2(8322945) = 22.99.
|
|
||||||
acc += filter_val * input_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[out_channel];
|
|
||||||
}
|
|
||||||
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
|
|
||||||
acc, output_multiplier[out_channel], output_shift[out_channel]);
|
|
||||||
scaled_acc = std::max(scaled_acc, output_activation_min);
|
|
||||||
scaled_acc = std::min(scaled_acc, output_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
|
||||||
static_cast<int16_t>(scaled_acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
|
|
||||||
@@ -1,291 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
inline void DepthwiseConvPerChannel(
|
|
||||||
const DepthwiseParams& params, const int32_t* output_multiplier,
|
|
||||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
|
||||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int8_t* output_data) {
|
|
||||||
// Get parameters.
|
|
||||||
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
const int depth_multiplier = params.depth_multiplier;
|
|
||||||
const int32_t input_offset = params.input_offset;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
|
|
||||||
// Check dimensions of the tensors.
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
|
||||||
for (int m = 0; m < depth_multiplier; ++m) {
|
|
||||||
const int output_channel = m + in_channel * depth_multiplier;
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
const int in_y =
|
|
||||||
in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
// Zero padding by omitting the areas outside the image.
|
|
||||||
const bool is_point_inside_image =
|
|
||||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height);
|
|
||||||
if (is_point_inside_image) {
|
|
||||||
int32_t input_val = input_data[Offset(
|
|
||||||
input_shape, batch, in_y, in_x, in_channel)];
|
|
||||||
int32_t filter_val = filter_data[Offset(
|
|
||||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
|
||||||
// Accumulate with 32 bits accumulator.
|
|
||||||
// In the nudging process during model quantization, we force
|
|
||||||
// real value of 0.0 be represented by a quantized value. This
|
|
||||||
// guarantees that the input_offset is a int8_t, even though
|
|
||||||
// it is represented using int32_t. int32_t += int8_t *
|
|
||||||
// (int8_t - int8_t) so the highest value we can get from each
|
|
||||||
// accumulation is [-127, 127] * ([-128, 127] -
|
|
||||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
|
||||||
// = 14.98, which means we can accumulate at least 2^16
|
|
||||||
// multiplications without overflow. The accumulator is
|
|
||||||
// applied to a filter so the accumulation logic will hold as
|
|
||||||
// long as the filter size (filter_y * filter_x * in_channel)
|
|
||||||
// does not exceed 2^16, which is the case in all the models
|
|
||||||
// we have seen so far.
|
|
||||||
// TODO(b/174275578): Add a check to make sure the
|
|
||||||
// accumulator depth is smaller than 2^16.
|
|
||||||
acc += filter_val * (input_val + input_offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[output_channel];
|
|
||||||
}
|
|
||||||
acc = MultiplyByQuantizedMultiplier(
|
|
||||||
acc, output_multiplier[output_channel],
|
|
||||||
output_shift[output_channel]);
|
|
||||||
acc += output_offset;
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x,
|
|
||||||
output_channel)] = static_cast<int8_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void DepthwiseConvPerChannel(
|
|
||||||
const DepthwiseParams& params, const int32_t* output_multiplier,
|
|
||||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
|
||||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const std::int64_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int16_t* output_data) {
|
|
||||||
// Get parameters.
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
const int depth_multiplier = params.depth_multiplier;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
|
|
||||||
// Check dimensions of the tensors.
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
|
||||||
for (int m = 0; m < depth_multiplier; ++m) {
|
|
||||||
const int output_channel = m + in_channel * depth_multiplier;
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
std::int64_t acc = 0;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
const int in_y =
|
|
||||||
in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
// Zero padding by omitting the areas outside the image.
|
|
||||||
const bool is_point_inside_image =
|
|
||||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height);
|
|
||||||
if (is_point_inside_image) {
|
|
||||||
int32_t input_val = input_data[Offset(
|
|
||||||
input_shape, batch, in_y, in_x, in_channel)];
|
|
||||||
int32_t filter_val = filter_data[Offset(
|
|
||||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
|
||||||
// Accumulate with 64 bits accumulator.
|
|
||||||
// We assume maximum of 2^16 accumulations as with the 8-bit
|
|
||||||
// case so actually the value in the accumulator should not
|
|
||||||
// exceed 40 bits
|
|
||||||
acc += static_cast<int64_t>(filter_val) *
|
|
||||||
static_cast<int64_t>(input_val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[output_channel];
|
|
||||||
}
|
|
||||||
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
|
|
||||||
acc, output_multiplier[output_channel],
|
|
||||||
output_shift[output_channel]);
|
|
||||||
scaled_acc = std::max(scaled_acc, output_activation_min);
|
|
||||||
scaled_acc = std::min(scaled_acc, output_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x,
|
|
||||||
output_channel)] =
|
|
||||||
static_cast<int16_t>(scaled_acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void DepthwiseConvHybridPerChannel(
|
|
||||||
const DepthwiseParams& params, float* scaling_factors_ptr,
|
|
||||||
const RuntimeShape& input_shape, const int8_t* input_data,
|
|
||||||
const RuntimeShape& filter_shape, const int8_t* filter_data,
|
|
||||||
const RuntimeShape& bias_shape, const float* bias_data,
|
|
||||||
const RuntimeShape& output_shape, float* output_data,
|
|
||||||
const float* per_channel_scale, int32_t* input_offset) {
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int dilation_width_factor = params.dilation_width_factor;
|
|
||||||
const int dilation_height_factor = params.dilation_height_factor;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
const int depth_multiplier = params.depth_multiplier;
|
|
||||||
const float output_activation_min = params.float_activation_min;
|
|
||||||
const float output_activation_max = params.float_activation_max;
|
|
||||||
// Check dimensions of the tensors.
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int input_depth = input_shape.Dims(3);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
const int bias_depth = bias_shape.FlatSize();
|
|
||||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
|
||||||
TFLITE_DCHECK_EQ(bias_depth, output_depth);
|
|
||||||
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
|
||||||
for (int m = 0; m < depth_multiplier; ++m) {
|
|
||||||
const int output_channel = m + in_channel * depth_multiplier;
|
|
||||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
|
||||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
|
||||||
const int in_y =
|
|
||||||
in_y_origin + dilation_height_factor * filter_y;
|
|
||||||
// Zero padding by omitting the areas outside the image.
|
|
||||||
const bool is_point_inside_image =
|
|
||||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
|
||||||
(in_y < input_height);
|
|
||||||
if (is_point_inside_image) {
|
|
||||||
int32_t input_val = input_data[Offset(
|
|
||||||
input_shape, batch, in_y, in_x, in_channel)];
|
|
||||||
int32_t filter_val = filter_data[Offset(
|
|
||||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
|
||||||
acc += filter_val * (input_val - input_offset[batch]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
float acc_float = static_cast<float>(acc);
|
|
||||||
acc_float *=
|
|
||||||
per_channel_scale[output_channel] * scaling_factors_ptr[batch];
|
|
||||||
if (bias_data && output_channel < bias_depth) {
|
|
||||||
acc_float += bias_data[output_channel];
|
|
||||||
}
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x,
|
|
||||||
output_channel)] =
|
|
||||||
ActivationFunctionWithMinMax(acc_float, output_activation_min,
|
|
||||||
output_activation_max);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
|
|
||||||
@@ -1,201 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
|
|
||||||
// For per-channel functions, since it is defined in quantization spec that
|
|
||||||
// weights are symmetric
|
|
||||||
// (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric),
|
|
||||||
// zero_point (params.weights_offset) is always 0.
|
|
||||||
// However, for per-tensor functions, params.weights_offset is still applied for
|
|
||||||
// backward compatibility.
|
|
||||||
|
|
||||||
inline void FullyConnectedPerChannel(
|
|
||||||
const FullyConnectedParams& params, const int32_t* output_multiplier,
|
|
||||||
const int* output_shift, const RuntimeShape& input_shape,
|
|
||||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int8_t* output_data) {
|
|
||||||
const int32_t input_offset = params.input_offset;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
|
||||||
const int batches = output_shape.Dims(0);
|
|
||||||
const int output_depth = output_shape.Dims(1);
|
|
||||||
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
|
|
||||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
|
||||||
for (int b = 0; b < batches; ++b) {
|
|
||||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int d = 0; d < accum_depth; ++d) {
|
|
||||||
int32_t input_val = input_data[b * accum_depth + d];
|
|
||||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
|
||||||
acc += filter_val * (input_val + input_offset);
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[out_c];
|
|
||||||
}
|
|
||||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c],
|
|
||||||
output_shift[out_c]);
|
|
||||||
acc += output_offset;
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename AccumScalar>
|
|
||||||
inline void FullyConnectedPerChannel(
|
|
||||||
const FullyConnectedParams& params, const int32_t* output_multiplier,
|
|
||||||
const int* output_shift, const RuntimeShape& input_shape,
|
|
||||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const AccumScalar* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int16_t* output_data) {
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
|
||||||
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
|
||||||
const int output_dim_count = output_shape.DimensionsCount();
|
|
||||||
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
|
|
||||||
const int output_depth = output_shape.Dims(output_dim_count - 1);
|
|
||||||
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
|
|
||||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
|
||||||
for (int b = 0; b < batches; ++b) {
|
|
||||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
|
||||||
AccumScalar acc = 0;
|
|
||||||
for (int d = 0; d < accum_depth; ++d) {
|
|
||||||
int32_t input_val = input_data[b * accum_depth + d];
|
|
||||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
|
||||||
acc += filter_val * input_val;
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[out_c];
|
|
||||||
}
|
|
||||||
int32_t acc_scaled = MultiplyByQuantizedMultiplier(
|
|
||||||
acc, output_multiplier[out_c], output_shift[out_c]);
|
|
||||||
acc_scaled = std::max(acc_scaled, output_activation_min);
|
|
||||||
acc_scaled = std::min(acc_scaled, output_activation_max);
|
|
||||||
output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void FullyConnected(
|
|
||||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
|
||||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int8_t* output_data) {
|
|
||||||
const int32_t input_offset = params.input_offset;
|
|
||||||
const int32_t filter_offset = params.weights_offset;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
const int32_t output_multiplier = params.output_multiplier;
|
|
||||||
const int output_shift = params.output_shift;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
|
||||||
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
|
||||||
const int output_dim_count = output_shape.DimensionsCount();
|
|
||||||
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
|
|
||||||
const int output_depth = output_shape.Dims(output_dim_count - 1);
|
|
||||||
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
|
|
||||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
|
||||||
for (int b = 0; b < batches; ++b) {
|
|
||||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int d = 0; d < accum_depth; ++d) {
|
|
||||||
int32_t input_val = input_data[b * accum_depth + d];
|
|
||||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
|
||||||
acc += (filter_val + filter_offset) * (input_val + input_offset);
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[out_c];
|
|
||||||
}
|
|
||||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
|
|
||||||
acc += output_offset;
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename AccumScalar>
|
|
||||||
inline void FullyConnected(
|
|
||||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
|
||||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const AccumScalar* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int16_t* output_data) {
|
|
||||||
const int32_t filter_offset = params.weights_offset;
|
|
||||||
const int32_t output_multiplier = params.output_multiplier;
|
|
||||||
const int output_shift = params.output_shift;
|
|
||||||
const int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
const int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
|
||||||
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
|
|
||||||
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
|
||||||
const int output_dim_count = output_shape.DimensionsCount();
|
|
||||||
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
|
|
||||||
const int output_depth = output_shape.Dims(output_dim_count - 1);
|
|
||||||
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
|
|
||||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
|
||||||
for (int b = 0; b < batches; ++b) {
|
|
||||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
|
||||||
AccumScalar acc = 0;
|
|
||||||
for (int d = 0; d < accum_depth; ++d) {
|
|
||||||
int32_t input_val = input_data[b * accum_depth + d];
|
|
||||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
|
||||||
acc += (filter_val + filter_offset) * input_val;
|
|
||||||
}
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[out_c];
|
|
||||||
}
|
|
||||||
int32_t acc_scaled =
|
|
||||||
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
|
|
||||||
acc_scaled = std::max(acc_scaled, output_activation_min);
|
|
||||||
acc_scaled = std::min(acc_scaled, output_activation_max);
|
|
||||||
output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
|
|
||||||
@@ -1,67 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
|
|
||||||
inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
|
|
||||||
int32_t depth, const int8_t* input_data,
|
|
||||||
int8_t* output_data) {
|
|
||||||
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
|
|
||||||
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
|
|
||||||
// The output scale must be in sync with Prepare().
|
|
||||||
// Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
|
|
||||||
// to [-1, 127/128].
|
|
||||||
static constexpr int32_t kOutputScale = 7;
|
|
||||||
for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
|
|
||||||
// int32_t = (int8_t - int8_t) ^ 2.
|
|
||||||
// ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
|
|
||||||
// safe from overflowing in at least 2^16 steps.
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int inner_index = 0; inner_index < depth; ++inner_index) {
|
|
||||||
int32_t input =
|
|
||||||
input_data[depth * outer_index + inner_index] - input_zero_point;
|
|
||||||
acc += input * input;
|
|
||||||
}
|
|
||||||
int32_t inv_l2norm_multiplier;
|
|
||||||
int inv_l2norm_shift;
|
|
||||||
GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
|
|
||||||
&inv_l2norm_shift);
|
|
||||||
|
|
||||||
for (int inner_index = 0; inner_index < depth; ++inner_index) {
|
|
||||||
int32_t input =
|
|
||||||
input_data[depth * outer_index + inner_index] - input_zero_point;
|
|
||||||
|
|
||||||
// Rescale and downcast. Rescale is folded into the division.
|
|
||||||
int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
|
|
||||||
input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
|
|
||||||
output_in_q24 =
|
|
||||||
std::min(static_cast<int32_t>(kMaxInt8),
|
|
||||||
std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
|
|
||||||
output_data[depth * outer_index + inner_index] =
|
|
||||||
static_cast<int8_t>(output_in_q24);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
|
|
||||||
@@ -1,121 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
|
|
||||||
inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
|
|
||||||
int32_t input_multiplier, int32_t input_left_shift,
|
|
||||||
int32_t input_size, const int8_t* input_data,
|
|
||||||
int8_t* output_data) {
|
|
||||||
// Integer bits must be in sync with Prepare() function.
|
|
||||||
static constexpr int32_t kInputIntegerBits = 4;
|
|
||||||
static constexpr int32_t kOutputIntegerBits = 8;
|
|
||||||
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
|
|
||||||
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
|
|
||||||
static constexpr int32_t kOutputZeroPoint = -128;
|
|
||||||
|
|
||||||
for (int i = 0; i < input_size; ++i) {
|
|
||||||
const int32_t input =
|
|
||||||
static_cast<int32_t>(input_data[i]) - input_zero_point;
|
|
||||||
if (input <= -input_range_radius) {
|
|
||||||
output_data[i] = kMinInt8;
|
|
||||||
} else if (input >= input_range_radius) {
|
|
||||||
output_data[i] = kMaxInt8;
|
|
||||||
} else {
|
|
||||||
const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
|
|
||||||
input, input_multiplier, input_left_shift);
|
|
||||||
using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
|
|
||||||
const int32_t output_in_q0 =
|
|
||||||
gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
|
|
||||||
|
|
||||||
// Rescale and downcast.
|
|
||||||
using gemmlowp::RoundingDivideByPOT;
|
|
||||||
int32_t output_in_q23 =
|
|
||||||
RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
|
|
||||||
output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
|
|
||||||
static_cast<int32_t>(kMinInt8)),
|
|
||||||
static_cast<int32_t>(kMaxInt8));
|
|
||||||
output_data[i] = static_cast<int8_t>(output_in_q23);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Logistic(int32_t input_multiplier, int32_t input_left_shift,
|
|
||||||
int32_t input_size, const int16_t* ptr_input_data,
|
|
||||||
int16_t* ptr_output_data) {
|
|
||||||
// We use the LUT for sigmoid and take into account, that
|
|
||||||
// tanh(x) = 2*sigmoid(2*x) - 1
|
|
||||||
|
|
||||||
// We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
|
|
||||||
// In case of general parameter scale, multiplier 3 is taken into account
|
|
||||||
// in TanhPrepare function and it is included in
|
|
||||||
// input_multiplier already.
|
|
||||||
|
|
||||||
TFLITE_DCHECK_GE(input_left_shift, 0);
|
|
||||||
if (input_multiplier == 0) { // power of two case
|
|
||||||
input_multiplier = 3 << input_left_shift;
|
|
||||||
input_left_shift = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
|
|
||||||
int32_t input_data =
|
|
||||||
((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
|
|
||||||
|
|
||||||
// We do interpolation on unsigned values.
|
|
||||||
uint32_t abs_input_data = abs(input_data);
|
|
||||||
|
|
||||||
// We divide by 2 power of 9, because
|
|
||||||
// we need to divide by 2 in power of 7 for
|
|
||||||
// the input conversion + 1/4 from the scale above.
|
|
||||||
|
|
||||||
// Define uh as uint32_t type not to make this function overflow.
|
|
||||||
uint32_t uh = abs_input_data >> 9;
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
if (uh >= 255) {
|
|
||||||
// Saturate to maximum.
|
|
||||||
result = 0x7FFF << 10;
|
|
||||||
} else {
|
|
||||||
uint32_t ua = sigmoid_table_uint16[uh];
|
|
||||||
uint32_t ub = sigmoid_table_uint16[uh + 1];
|
|
||||||
uint32_t ut = abs_input_data & 0x1ff;
|
|
||||||
// Interpolation is done using the fractional bit.
|
|
||||||
result = (ua << 9) + ut * (ub - ua);
|
|
||||||
}
|
|
||||||
|
|
||||||
result = (input_data >= 0) ? (result + (1 << 9))
|
|
||||||
: ((1 << (16 + 9)) - result + (1 << 9) - 1);
|
|
||||||
|
|
||||||
// Back to 16-bit.
|
|
||||||
result >>= 10;
|
|
||||||
|
|
||||||
*ptr_output_data = result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
|
|
||||||
@@ -1,79 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
|
|
||||||
template <typename integer_type>
|
|
||||||
inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
|
|
||||||
int32_t shift, const RuntimeShape& unextended_input_shape,
|
|
||||||
const integer_type* input_data, int32_t input_zero_point,
|
|
||||||
const RuntimeShape& unextended_output_shape,
|
|
||||||
integer_type* output_data, int32_t output_zero_point) {
|
|
||||||
// Current implementation only supports dimension equals 4 and simultaneous
|
|
||||||
// reduction over width and height.
|
|
||||||
TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
|
||||||
const RuntimeShape input_shape =
|
|
||||||
RuntimeShape::ExtendedShape(4, unextended_input_shape);
|
|
||||||
const RuntimeShape output_shape =
|
|
||||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
|
||||||
const int output_batch = output_shape.Dims(0);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
const int output_depth = output_shape.Dims(3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int num_elements_in_axis = input_width * input_height;
|
|
||||||
|
|
||||||
TFLITE_CHECK_EQ(op_params.axis_count, 2);
|
|
||||||
TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
|
||||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
|
|
||||||
TFLITE_CHECK_EQ(output_height, 1);
|
|
||||||
TFLITE_CHECK_EQ(output_width, 1);
|
|
||||||
|
|
||||||
static constexpr int32_t kMinInt = std::numeric_limits<integer_type>::min();
|
|
||||||
static constexpr int32_t kMaxInt = std::numeric_limits<integer_type>::max();
|
|
||||||
|
|
||||||
for (int out_b = 0; out_b < output_batch; ++out_b) {
|
|
||||||
for (int out_d = 0; out_d < output_depth; ++out_d) {
|
|
||||||
int32_t acc = 0;
|
|
||||||
for (int in_h = 0; in_h < input_height; ++in_h) {
|
|
||||||
for (int in_w = 0; in_w < input_width; ++in_w) {
|
|
||||||
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] -
|
|
||||||
input_zero_point;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
|
|
||||||
acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
|
|
||||||
: (acc - num_elements_in_axis / 2) / num_elements_in_axis;
|
|
||||||
acc += output_zero_point;
|
|
||||||
acc = std::min(std::max(acc, kMinInt), kMaxInt);
|
|
||||||
output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
|
|
||||||
static_cast<integer_type>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
|
|
||||||
@@ -1,133 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "fixedpoint/fixedpoint.h"
|
|
||||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void MulElementwise(int size, const ArithmeticParams& params,
|
|
||||||
const T* input1_data, const T* input2_data,
|
|
||||||
T* output_data) {
|
|
||||||
for (int i = 0; i < size; ++i) {
|
|
||||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
|
||||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
|
||||||
const int32_t unclamped_result =
|
|
||||||
params.output_offset +
|
|
||||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
|
||||||
params.output_multiplier,
|
|
||||||
params.output_shift);
|
|
||||||
const int32_t clamped_output =
|
|
||||||
std::min(params.quantized_activation_max,
|
|
||||||
std::max(params.quantized_activation_min, unclamped_result));
|
|
||||||
output_data[i] = static_cast<T>(clamped_output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void Mul(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const T* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const T* input2_data,
|
|
||||||
const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
ruy::profiler::ScopeLabel label("Mul/8bit");
|
|
||||||
const int flat_size =
|
|
||||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
|
||||||
|
|
||||||
MulElementwise(flat_size, params, input1_data, input2_data, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mul with 16 bit inputs and int8_t outputs.
|
|
||||||
inline void Mul(const ArithmeticParams& params,
|
|
||||||
const RuntimeShape& input1_shape, const int16_t* input1_data,
|
|
||||||
const RuntimeShape& input2_shape, const int16_t* input2_data,
|
|
||||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
|
||||||
ruy::profiler::ScopeLabel label("Mul/Int16Int8");
|
|
||||||
int32_t output_offset = params.output_offset;
|
|
||||||
int32_t output_activation_min = params.quantized_activation_min;
|
|
||||||
int32_t output_activation_max = params.quantized_activation_max;
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
|
|
||||||
const int flat_size =
|
|
||||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
|
||||||
|
|
||||||
for (int i = 0; i < flat_size; i++) {
|
|
||||||
// F0 uses 0 integer bits, range [-1, 1].
|
|
||||||
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
|
|
||||||
|
|
||||||
F0 unclamped_result =
|
|
||||||
F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
|
|
||||||
int16_t rescaled_result =
|
|
||||||
gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
|
|
||||||
int16_t clamped_result = std::min<int16_t>(
|
|
||||||
output_activation_max - output_offset, rescaled_result);
|
|
||||||
clamped_result = std::max<int16_t>(output_activation_min - output_offset,
|
|
||||||
clamped_result);
|
|
||||||
output_data[i] = output_offset + clamped_result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void BroadcastMul4DSlow(
|
|
||||||
const ArithmeticParams& params, const RuntimeShape& input1_shape,
|
|
||||||
const T* input1_data, const RuntimeShape& input2_shape,
|
|
||||||
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
|
|
||||||
ruy::profiler::ScopeLabel label("BroadcastMul4DSlow");
|
|
||||||
|
|
||||||
NdArrayDesc<4> desc1;
|
|
||||||
NdArrayDesc<4> desc2;
|
|
||||||
// The input shapes are extended as part of NdArrayDesc initialization.
|
|
||||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
|
||||||
&desc2);
|
|
||||||
const RuntimeShape extended_output_shape =
|
|
||||||
RuntimeShape::ExtendedShape(4, output_shape);
|
|
||||||
|
|
||||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
|
||||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
|
||||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
|
||||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
|
||||||
const int32_t input1_val =
|
|
||||||
params.input1_offset +
|
|
||||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
|
||||||
const int32_t input2_val =
|
|
||||||
params.input2_offset +
|
|
||||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
|
||||||
const int32_t unclamped_result =
|
|
||||||
params.output_offset +
|
|
||||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
|
||||||
params.output_multiplier,
|
|
||||||
params.output_shift);
|
|
||||||
const int32_t clamped_output = std::min(
|
|
||||||
params.quantized_activation_max,
|
|
||||||
std::max(params.quantized_activation_min, unclamped_result));
|
|
||||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
|
||||||
static_cast<T>(clamped_output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
|
|
||||||
@@ -1,264 +0,0 @@
|
|||||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
|
|
||||||
inline bool AveragePool(const PoolParams& params,
|
|
||||||
const RuntimeShape& input_shape,
|
|
||||||
const int8_t* input_data,
|
|
||||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int channel = 0; channel < depth; ++channel) {
|
|
||||||
const int in_x_origin =
|
|
||||||
(out_x * stride_width) - params.padding_values.width;
|
|
||||||
const int in_y_origin =
|
|
||||||
(out_y * stride_height) - params.padding_values.height;
|
|
||||||
// Compute the boundaries of the filter region clamped so as to
|
|
||||||
// ensure that the filter window fits in the input array.
|
|
||||||
const int filter_x_start = std::max(0, -in_x_origin);
|
|
||||||
const int filter_x_end =
|
|
||||||
std::min(params.filter_width, input_width - in_x_origin);
|
|
||||||
const int filter_y_start = std::max(0, -in_y_origin);
|
|
||||||
const int filter_y_end =
|
|
||||||
std::min(params.filter_height, input_height - in_y_origin);
|
|
||||||
int32_t acc = 0;
|
|
||||||
int filter_count = 0;
|
|
||||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
|
||||||
++filter_y) {
|
|
||||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
|
||||||
++filter_x) {
|
|
||||||
const int in_x = in_x_origin + filter_x;
|
|
||||||
const int in_y = in_y_origin + filter_y;
|
|
||||||
acc +=
|
|
||||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
|
|
||||||
filter_count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (filter_count == 0) return false;
|
|
||||||
// Round to the closest integer value.
|
|
||||||
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
|
|
||||||
: (acc - filter_count / 2) / filter_count;
|
|
||||||
acc = std::max(acc, params.quantized_activation_min);
|
|
||||||
acc = std::min(acc, params.quantized_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
|
||||||
static_cast<int8_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
|
||||||
const int8_t* input_data, const RuntimeShape& output_shape,
|
|
||||||
int8_t* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
TFLITE_DCHECK_GE(params.quantized_activation_min,
|
|
||||||
std::numeric_limits<int8_t>::min());
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_max,
|
|
||||||
std::numeric_limits<int8_t>::max());
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int channel = 0; channel < depth; ++channel) {
|
|
||||||
const int in_x_origin =
|
|
||||||
(out_x * stride_width) - params.padding_values.width;
|
|
||||||
const int in_y_origin =
|
|
||||||
(out_y * stride_height) - params.padding_values.height;
|
|
||||||
// Compute the boundaries of the filter region clamped so as to
|
|
||||||
// ensure that the filter window fits in the input array.
|
|
||||||
const int filter_x_start = std::max(0, -in_x_origin);
|
|
||||||
const int filter_x_end =
|
|
||||||
std::min(params.filter_width, input_width - in_x_origin);
|
|
||||||
const int filter_y_start = std::max(0, -in_y_origin);
|
|
||||||
const int filter_y_end =
|
|
||||||
std::min(params.filter_height, input_height - in_y_origin);
|
|
||||||
int8_t max = std::numeric_limits<int8_t>::lowest();
|
|
||||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
|
||||||
++filter_y) {
|
|
||||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
|
||||||
++filter_x) {
|
|
||||||
const int in_x = in_x_origin + filter_x;
|
|
||||||
const int in_y = in_y_origin + filter_y;
|
|
||||||
max = std::max(
|
|
||||||
max,
|
|
||||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
max = std::max<int8_t>(max, params.quantized_activation_min);
|
|
||||||
max = std::min<int8_t>(max, params.quantized_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
|
||||||
static_cast<int8_t>(max);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool AveragePool(const PoolParams& params,
|
|
||||||
const RuntimeShape& input_shape,
|
|
||||||
const int16_t* input_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
int16_t* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int channel = 0; channel < depth; ++channel) {
|
|
||||||
const int in_x_origin =
|
|
||||||
(out_x * stride_width) - params.padding_values.width;
|
|
||||||
const int in_y_origin =
|
|
||||||
(out_y * stride_height) - params.padding_values.height;
|
|
||||||
// Compute the boundaries of the filter region clamped so as to
|
|
||||||
// ensure that the filter window fits in the input array.
|
|
||||||
const int filter_x_start = std::max(0, -in_x_origin);
|
|
||||||
const int filter_x_end =
|
|
||||||
std::min(params.filter_width, input_width - in_x_origin);
|
|
||||||
const int filter_y_start = std::max(0, -in_y_origin);
|
|
||||||
const int filter_y_end =
|
|
||||||
std::min(params.filter_height, input_height - in_y_origin);
|
|
||||||
int32_t acc = 0;
|
|
||||||
int filter_count = 0;
|
|
||||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
|
||||||
++filter_y) {
|
|
||||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
|
||||||
++filter_x) {
|
|
||||||
const int in_x = in_x_origin + filter_x;
|
|
||||||
const int in_y = in_y_origin + filter_y;
|
|
||||||
acc +=
|
|
||||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
|
|
||||||
filter_count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (filter_count == 0) return false;
|
|
||||||
// Round to the closest integer value.
|
|
||||||
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
|
|
||||||
: (acc - filter_count / 2) / filter_count;
|
|
||||||
acc = std::max(acc, params.quantized_activation_min);
|
|
||||||
acc = std::min(acc, params.quantized_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
|
||||||
static_cast<int16_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
|
||||||
const int16_t* input_data, const RuntimeShape& output_shape,
|
|
||||||
int16_t* output_data) {
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
|
||||||
params.quantized_activation_max);
|
|
||||||
TFLITE_DCHECK_GE(params.quantized_activation_min,
|
|
||||||
std::numeric_limits<int16_t>::min());
|
|
||||||
TFLITE_DCHECK_LE(params.quantized_activation_max,
|
|
||||||
std::numeric_limits<int16_t>::max());
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int channel = 0; channel < depth; ++channel) {
|
|
||||||
const int in_x_origin =
|
|
||||||
(out_x * stride_width) - params.padding_values.width;
|
|
||||||
const int in_y_origin =
|
|
||||||
(out_y * stride_height) - params.padding_values.height;
|
|
||||||
// Compute the boundaries of the filter region clamped so as to
|
|
||||||
// ensure that the filter window fits in the input array.
|
|
||||||
const int filter_x_start = std::max(0, -in_x_origin);
|
|
||||||
const int filter_x_end =
|
|
||||||
std::min(params.filter_width, input_width - in_x_origin);
|
|
||||||
const int filter_y_start = std::max(0, -in_y_origin);
|
|
||||||
const int filter_y_end =
|
|
||||||
std::min(params.filter_height, input_height - in_y_origin);
|
|
||||||
int16_t max = std::numeric_limits<int16_t>::lowest();
|
|
||||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
|
||||||
++filter_y) {
|
|
||||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
|
||||||
++filter_x) {
|
|
||||||
const int in_x = in_x_origin + filter_x;
|
|
||||||
const int in_y = in_y_origin + filter_y;
|
|
||||||
max = std::max(
|
|
||||||
max,
|
|
||||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
max = std::max<int16_t>(max, params.quantized_activation_min);
|
|
||||||
max = std::min<int16_t>(max, params.quantized_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
|
||||||
static_cast<int16_t>(max);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
|
|
||||||
@@ -1,117 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "fixedpoint/fixedpoint.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
|
|
||||||
inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
|
|
||||||
int32_t input_multiplier, int32_t input_shift,
|
|
||||||
const RuntimeShape& input_shape, const int8_t* input_data,
|
|
||||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
|
||||||
// Integer bits must be in sync with Prepare() function.
|
|
||||||
static constexpr int32_t kInputIntegerBits = 4;
|
|
||||||
static constexpr int32_t kOutputScale = 7;
|
|
||||||
static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
|
|
||||||
static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
|
|
||||||
using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
|
|
||||||
|
|
||||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
|
||||||
|
|
||||||
for (int i = 0; i < flat_size; ++i) {
|
|
||||||
const int32_t input =
|
|
||||||
static_cast<int32_t>(input_data[i]) - input_zero_point;
|
|
||||||
if (input <= -input_range_radius) {
|
|
||||||
output_data[i] = kMinInt8;
|
|
||||||
} else if (input >= input_range_radius) {
|
|
||||||
output_data[i] = kMaxInt8;
|
|
||||||
} else {
|
|
||||||
const int32_t input_in_q4 =
|
|
||||||
MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
|
|
||||||
const int32_t output_in_q0 =
|
|
||||||
gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
|
|
||||||
|
|
||||||
// Rescale and downcast.
|
|
||||||
using gemmlowp::RoundingDivideByPOT;
|
|
||||||
int32_t output_in_q24 =
|
|
||||||
RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
|
|
||||||
output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
|
|
||||||
output_data[i] = static_cast<int8_t>(output_in_q24);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
|
|
||||||
const RuntimeShape& input_shape, const int16_t* ptr_input_data,
|
|
||||||
const RuntimeShape& output_shape, int16_t* ptr_output_data) {
|
|
||||||
// We use the LUT for sigmoid and take into account, that
|
|
||||||
// tanh(x) = 2*sigmoid(2*x) - 1
|
|
||||||
|
|
||||||
// We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
|
|
||||||
// In case of general parameter scale, multiplier 3 is taken into account
|
|
||||||
// in TanhPrepare function and it is included in
|
|
||||||
// input_multiplier already.
|
|
||||||
|
|
||||||
if (input_multiplier == 0) { // power of two case
|
|
||||||
input_multiplier = 3 << input_left_shift;
|
|
||||||
input_left_shift = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
|
|
||||||
|
|
||||||
int flat_size = MatchingFlatSize(input_shape, output_shape);
|
|
||||||
|
|
||||||
for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) {
|
|
||||||
int32_t input_data =
|
|
||||||
((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
|
|
||||||
|
|
||||||
uint32_t abs_input_data = abs(input_data);
|
|
||||||
uint32_t uh = abs_input_data >> 8;
|
|
||||||
int32_t result;
|
|
||||||
|
|
||||||
if (uh >= 255) {
|
|
||||||
// Saturate to maximum.
|
|
||||||
result = 0xFFFF << 8;
|
|
||||||
} else {
|
|
||||||
uint32_t ua = sigmoid_table_uint16[uh];
|
|
||||||
uint32_t ub = sigmoid_table_uint16[uh + 1];
|
|
||||||
|
|
||||||
uint8_t ut = abs_input_data & 0xFF;
|
|
||||||
|
|
||||||
result = (ua << 8) + ut * (ub - ua);
|
|
||||||
}
|
|
||||||
|
|
||||||
result = (input_data >= 0)
|
|
||||||
? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
|
|
||||||
: (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
|
|
||||||
|
|
||||||
// Convert back to 16-bit.
|
|
||||||
result >>= (9 - 1);
|
|
||||||
|
|
||||||
*ptr_output_data = result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
|
|
||||||
@@ -1,224 +0,0 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_integer_ops {
|
|
||||||
|
|
||||||
// Fixed-point per-channel-quantization transpose convolution reference kernel.
|
|
||||||
inline void TransposeConv(
|
|
||||||
const ConvParams& params, const int32_t* output_multiplier,
|
|
||||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
|
||||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int8_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
|
|
||||||
int32_t* scratch_buffer) {
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
(void)im2col_data; // only used in optimized code.
|
|
||||||
(void)im2col_shape; // only used in optimized code.
|
|
||||||
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
|
||||||
if (bias_data) {
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
}
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
const int32_t input_offset = params.input_offset;
|
|
||||||
const int32_t output_offset = params.output_offset;
|
|
||||||
const int32_t output_activation_min = std::numeric_limits<int8_t>::min();
|
|
||||||
const int32_t output_activation_max = std::numeric_limits<int8_t>::max();
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
|
|
||||||
const int num_elements = output_shape.FlatSize();
|
|
||||||
// We need to initialize scratch_buffer to all 0s, as we apply the same
|
|
||||||
// 'scatter' based trick as in float version.
|
|
||||||
memset(scratch_buffer, 0, num_elements * sizeof(int32_t));
|
|
||||||
|
|
||||||
// Loop through input elements one at a time.
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int in_y = 0; in_y < input_height; ++in_y) {
|
|
||||||
for (int in_x = 0; in_x < input_width; ++in_x) {
|
|
||||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
|
||||||
// Loop through the output elements it will influence.
|
|
||||||
const int out_x_origin = (in_x * stride_width) - pad_width;
|
|
||||||
const int out_y_origin = (in_y * stride_height) - pad_height;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
for (int out_channel = 0; out_channel < output_depth;
|
|
||||||
++out_channel) {
|
|
||||||
// Compute output element location.
|
|
||||||
const int out_x = out_x_origin + filter_x;
|
|
||||||
const int out_y = out_y_origin + filter_y;
|
|
||||||
// We cannot accumulate out of bounds.
|
|
||||||
if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
|
|
||||||
(out_y < output_height)) {
|
|
||||||
const int8_t input_value = input_data[Offset(
|
|
||||||
input_shape, batch, in_y, in_x, in_channel)];
|
|
||||||
const int8_t filter_value =
|
|
||||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
|
||||||
filter_x, in_channel)];
|
|
||||||
scratch_buffer[Offset(output_shape, batch, out_y, out_x,
|
|
||||||
out_channel)] +=
|
|
||||||
(input_value + input_offset) * filter_value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
|
||||||
int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
|
|
||||||
out_channel)];
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[out_channel];
|
|
||||||
}
|
|
||||||
acc = MultiplyByQuantizedMultiplier(
|
|
||||||
acc, output_multiplier[out_channel], output_shift[out_channel]);
|
|
||||||
acc += output_offset;
|
|
||||||
acc = std::max(acc, output_activation_min);
|
|
||||||
acc = std::min(acc, output_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
|
||||||
static_cast<int8_t>(acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// int16_t input (zero_point=0), int8_t filter, int32 or int64 accumulator
|
|
||||||
template <typename Scalar>
|
|
||||||
inline void TransposeConv(
|
|
||||||
const ConvParams& params, const int32_t* output_multiplier,
|
|
||||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
|
||||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
|
||||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
|
||||||
const Scalar* bias_data, const RuntimeShape& output_shape,
|
|
||||||
int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
|
|
||||||
Scalar* scratch_buffer) {
|
|
||||||
const int stride_width = params.stride_width;
|
|
||||||
const int stride_height = params.stride_height;
|
|
||||||
const int pad_width = params.padding_values.width;
|
|
||||||
const int pad_height = params.padding_values.height;
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
|
||||||
(void)im2col_data; // only used in optimized code.
|
|
||||||
(void)im2col_shape; // only used in optimized code.
|
|
||||||
|
|
||||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
|
||||||
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
|
|
||||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
|
||||||
if (bias_data) {
|
|
||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
|
||||||
}
|
|
||||||
const int input_height = input_shape.Dims(1);
|
|
||||||
const int input_width = input_shape.Dims(2);
|
|
||||||
const int filter_height = filter_shape.Dims(1);
|
|
||||||
const int filter_width = filter_shape.Dims(2);
|
|
||||||
const int output_height = output_shape.Dims(1);
|
|
||||||
const int output_width = output_shape.Dims(2);
|
|
||||||
const int32_t output_activation_min = std::numeric_limits<int16_t>::min();
|
|
||||||
const int32_t output_activation_max = std::numeric_limits<int16_t>::max();
|
|
||||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
|
||||||
|
|
||||||
const int num_elements = output_shape.FlatSize();
|
|
||||||
// We need to initialize scratch_buffer to all 0s, as we apply the same
|
|
||||||
// 'scatter' based trick as in float version.
|
|
||||||
memset(scratch_buffer, 0, num_elements * sizeof(Scalar));
|
|
||||||
|
|
||||||
// Loop through input elements one at a time.
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int in_y = 0; in_y < input_height; ++in_y) {
|
|
||||||
for (int in_x = 0; in_x < input_width; ++in_x) {
|
|
||||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
|
||||||
// Loop through the output elements it will influence.
|
|
||||||
const int out_x_origin = (in_x * stride_width) - pad_width;
|
|
||||||
const int out_y_origin = (in_y * stride_height) - pad_height;
|
|
||||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
|
||||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
|
||||||
for (int out_channel = 0; out_channel < output_depth;
|
|
||||||
++out_channel) {
|
|
||||||
// Compute output element location.
|
|
||||||
const int out_x = out_x_origin + filter_x;
|
|
||||||
const int out_y = out_y_origin + filter_y;
|
|
||||||
// We cannot accumulate out of bounds.
|
|
||||||
if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
|
|
||||||
(out_y < output_height)) {
|
|
||||||
const int32_t input_value = input_data[Offset(
|
|
||||||
input_shape, batch, in_y, in_x, in_channel)];
|
|
||||||
const int32_t filter_value =
|
|
||||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
|
||||||
filter_x, in_channel)];
|
|
||||||
scratch_buffer[Offset(output_shape, batch, out_y, out_x,
|
|
||||||
out_channel)] +=
|
|
||||||
input_value * filter_value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
|
||||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
|
||||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
|
||||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
|
||||||
Scalar acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
|
|
||||||
out_channel)];
|
|
||||||
if (bias_data) {
|
|
||||||
acc += bias_data[out_channel];
|
|
||||||
}
|
|
||||||
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
|
|
||||||
acc, output_multiplier[out_channel], output_shift[out_channel]);
|
|
||||||
scaled_acc = std::max(scaled_acc, output_activation_min);
|
|
||||||
scaled_acc = std::min(scaled_acc, output_activation_max);
|
|
||||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
|
||||||
static_cast<int16_t>(scaled_acc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_integer_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
|
|
||||||
@@ -1,90 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/c/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/types.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
|
|
||||||
const RuntimeShape& input_shape,
|
|
||||||
const float* input_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
float* output_data, float epsilon = 1e-6) {
|
|
||||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
|
||||||
const int outer_size =
|
|
||||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
|
||||||
const int depth =
|
|
||||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
|
||||||
for (int i = 0; i < outer_size; ++i) {
|
|
||||||
float squared_l2_norm = 0;
|
|
||||||
for (int c = 0; c < depth; ++c) {
|
|
||||||
const float val = input_data[depth * i + c];
|
|
||||||
squared_l2_norm += val * val;
|
|
||||||
}
|
|
||||||
float l2_norm = std::sqrt(squared_l2_norm);
|
|
||||||
l2_norm = std::max(l2_norm, epsilon);
|
|
||||||
for (int c = 0; c < depth; ++c) {
|
|
||||||
output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
|
|
||||||
const RuntimeShape& input_shape,
|
|
||||||
const uint8_t* input_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
uint8_t* output_data) {
|
|
||||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
|
||||||
const int depth =
|
|
||||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
|
||||||
const int outer_size =
|
|
||||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
|
||||||
const int32_t input_zero_point = op_params.input_zero_point;
|
|
||||||
|
|
||||||
for (int i = 0; i < outer_size; ++i) {
|
|
||||||
int32_t square_l2_norm = 0;
|
|
||||||
for (int c = 0; c < depth; c++) {
|
|
||||||
int32_t diff = input_data[depth * i + c] - input_zero_point;
|
|
||||||
square_l2_norm += diff * diff;
|
|
||||||
}
|
|
||||||
int32_t inv_l2norm_multiplier;
|
|
||||||
int inv_l2norm_shift;
|
|
||||||
GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
|
|
||||||
&inv_l2norm_multiplier, &inv_l2norm_shift);
|
|
||||||
for (int c = 0; c < depth; c++) {
|
|
||||||
int32_t diff = input_data[depth * i + c] - input_zero_point;
|
|
||||||
int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
|
|
||||||
int32_t unclamped_output_val = 128 + rescaled_diff;
|
|
||||||
int32_t output_val =
|
|
||||||
std::min(static_cast<int32_t>(255),
|
|
||||||
std::max(static_cast<int32_t>(0), unclamped_output_val));
|
|
||||||
output_data[depth * i + c] = static_cast<uint8_t>(output_val);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
|
|
||||||
@@ -1,69 +0,0 @@
|
|||||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
inline void LeakyRelu(const tflite::LeakyReluParams& params,
|
|
||||||
const RuntimeShape& input_shape, const float* input_data,
|
|
||||||
const RuntimeShape& output_shape, float* output_data) {
|
|
||||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
|
||||||
for (int i = 0; i < flat_size; ++i) {
|
|
||||||
const float val = input_data[i];
|
|
||||||
// Note that alpha might be > 1 or < 0, so we don't use std::max here.
|
|
||||||
output_data[i] = val > 0 ? val : val * params.alpha;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void QuantizeLeakyRelu(const LeakyReluParams& params,
|
|
||||||
const RuntimeShape& input_shape,
|
|
||||||
const T* input_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
T* output_data) {
|
|
||||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
|
||||||
static const int32_t quantized_min = std::numeric_limits<T>::min();
|
|
||||||
static const int32_t quantized_max = std::numeric_limits<T>::max();
|
|
||||||
for (int i = 0; i < flat_size; ++i) {
|
|
||||||
const int32_t input_value = input_data[i] - params.input_offset;
|
|
||||||
int32_t unclamped_output;
|
|
||||||
if (input_value >= 0) {
|
|
||||||
unclamped_output = params.output_offset +
|
|
||||||
MultiplyByQuantizedMultiplier(
|
|
||||||
input_value, params.output_multiplier_identity,
|
|
||||||
params.output_shift_identity);
|
|
||||||
} else {
|
|
||||||
unclamped_output = params.output_offset +
|
|
||||||
MultiplyByQuantizedMultiplier(
|
|
||||||
input_value, params.output_multiplier_alpha,
|
|
||||||
params.output_shift_alpha);
|
|
||||||
}
|
|
||||||
const T clamped_output =
|
|
||||||
std::min(quantized_max, std::max(quantized_min, unclamped_output));
|
|
||||||
output_data[i] = static_cast<T>(clamped_output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
|
|
||||||
@@ -1,256 +0,0 @@
|
|||||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
|
|
||||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cstddef>
|
|
||||||
#include <limits>
|
|
||||||
|
|
||||||
#include "fixedpoint/fixedpoint.h"
|
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
|
||||||
namespace reference_ops {
|
|
||||||
|
|
||||||
inline void LogSoftmax(const SoftmaxParams& params,
|
|
||||||
const RuntimeShape& input_shape, const float* input_data,
|
|
||||||
const RuntimeShape& output_shape, float* output_data) {
|
|
||||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
|
||||||
const int outer_size =
|
|
||||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
|
||||||
const int depth =
|
|
||||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
|
||||||
|
|
||||||
for (int i = 0; i < outer_size; ++i) {
|
|
||||||
// Find max element value which we'll use to ensure numerical stability
|
|
||||||
// taking advantage of the following equality:
|
|
||||||
// log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
|
|
||||||
float max = std::numeric_limits<float>::lowest();
|
|
||||||
for (int c = 0; c < depth; ++c) {
|
|
||||||
max = std::max(max, input_data[i * depth + c]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute sum.
|
|
||||||
float sum = 0.f;
|
|
||||||
for (int c = 0; c < depth; ++c) {
|
|
||||||
sum += std::exp(input_data[i * depth + c] - max);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute result.
|
|
||||||
const float log_sum = std::log(sum);
|
|
||||||
for (int c = 0; c < depth; ++c) {
|
|
||||||
output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void LogSoftmax(const SoftmaxParams& params,
|
|
||||||
const RuntimeShape& input_shape,
|
|
||||||
const uint8_t* input_data,
|
|
||||||
const RuntimeShape& output_shape, uint8_t* output_data) {
|
|
||||||
const int32_t input_multiplier = params.input_multiplier;
|
|
||||||
const int32_t input_left_shift = params.input_left_shift;
|
|
||||||
const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
|
|
||||||
const int32_t reverse_scaling_right_shift =
|
|
||||||
params.reverse_scaling_right_shift;
|
|
||||||
const int diff_min = params.diff_min;
|
|
||||||
// The representation chosen for the input to the exp() function is Q5.26.
|
|
||||||
// We need to leave extra space since values that we skip might be as large
|
|
||||||
// as -32 before multiplying by input_beta_multiplier, and therefore as
|
|
||||||
// large as -16 afterwards. Note that exp(-8) is definitely not
|
|
||||||
// insignificant to accumulation, but exp(-16) definitely is.
|
|
||||||
static constexpr int kScaledDiffIntegerBits = 5;
|
|
||||||
static constexpr int kAccumulationIntegerBits = 12;
|
|
||||||
static constexpr int kOutputIntegerBits = 4;
|
|
||||||
using FixedPointScaledDiff =
|
|
||||||
gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
|
|
||||||
using FixedPointAccum =
|
|
||||||
gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
|
|
||||||
|
|
||||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
|
||||||
const int outer_size =
|
|
||||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
|
||||||
const int depth =
|
|
||||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
|
||||||
|
|
||||||
for (int i = 0; i < outer_size; ++i) {
|
|
||||||
uint8_t max_in_row = 0;
|
|
||||||
for (int c = 0; c < depth; ++c) {
|
|
||||||
max_in_row = std::max(max_in_row, input_data[i * depth + c]);
|
|
||||||
}
|
|
||||||
|
|
||||||
FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
|
|
||||||
for (int c = 0; c < depth; ++c) {
|
|
||||||
int32_t input_diff =
|
|
||||||
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
|
|
||||||
if (input_diff >= diff_min) {
|
|
||||||
const int32_t input_diff_rescaled =
|
|
||||||
MultiplyByQuantizedMultiplierGreaterThanOne(
|
|
||||||
input_diff, input_multiplier, input_left_shift);
|
|
||||||
const FixedPointScaledDiff scaled_diff_f8 =
|
|
||||||
FixedPointScaledDiff::FromRaw(input_diff_rescaled);
|
|
||||||
sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
|
|
||||||
exp_on_negative_values(scaled_diff_f8));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const int32_t fixed_log_sum_of_exps =
|
|
||||||
log_x_for_x_greater_than_or_equal_to_1<kScaledDiffIntegerBits>(
|
|
||||||
sum_of_exps)
|
|
||||||
.raw();
|
|
||||||
|
|
||||||
// rescaled_diff_min is smallest representable in
|
|
||||||
// Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the
|
|
||||||
// log-sub-exps that will be subtracted in the loop.
|
|
||||||
//
|
|
||||||
// The thresholds diff_min, etc are negative.
|
|
||||||
const int rescaled_diff_min =
|
|
||||||
fixed_log_sum_of_exps + std::numeric_limits<int32_t>::lowest();
|
|
||||||
const int adjusted_diff_min =
|
|
||||||
std::max(static_cast<int32_t>(
|
|
||||||
diff_min - 1), // Note use of > below instead of >= above.
|
|
||||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
|
||||||
rescaled_diff_min, reverse_scaling_divisor,
|
|
||||||
-reverse_scaling_right_shift));
|
|
||||||
|
|
||||||
for (int c = 0; c < depth; ++c) {
|
|
||||||
int32_t input_diff =
|
|
||||||
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
|
|
||||||
if (input_diff > adjusted_diff_min) {
|
|
||||||
const int32_t input_diff_rescaled =
|
|
||||||
MultiplyByQuantizedMultiplierGreaterThanOne(
|
|
||||||
input_diff, input_multiplier, input_left_shift);
|
|
||||||
int32_t unsat_output =
|
|
||||||
gemmlowp::RoundingDivideByPOT(
|
|
||||||
(input_diff_rescaled - fixed_log_sum_of_exps),
|
|
||||||
31 - kScaledDiffIntegerBits - kOutputIntegerBits) +
|
|
||||||
255;
|
|
||||||
|
|
||||||
output_data[i * depth + c] = static_cast<uint8_t>(
|
|
||||||
std::max(std::min(unsat_output, static_cast<int32_t>(255)),
|
|
||||||
static_cast<int32_t>(0)));
|
|
||||||
} else {
|
|
||||||
// Set output to smallest value.
|
|
||||||
output_data[i * depth + c] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline void LogSoftmaxQuantized(const SoftmaxParams& params,
|
|
||||||
const size_t outer_size, const size_t depth,
|
|
||||||
const RuntimeShape& input_shape,
|
|
||||||
const T* input_data,
|
|
||||||
const RuntimeShape& output_shape,
|
|
||||||
T* output_data) {
|
|
||||||
const int32_t input_multiplier = params.input_multiplier;
|
|
||||||
const int32_t input_left_shift = params.input_left_shift;
|
|
||||||
const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
|
|
||||||
const int32_t reverse_scaling_right_shift =
|
|
||||||
params.reverse_scaling_right_shift;
|
|
||||||
const int diff_min = params.diff_min;
|
|
||||||
|
|
||||||
static constexpr T kMinT8 = std::numeric_limits<T>::min();
|
|
||||||
static constexpr T kMaxT8 = std::numeric_limits<T>::max();
|
|
||||||
static constexpr int32_t kMinInt32 = std::numeric_limits<int32_t>::min();
|
|
||||||
|
|
||||||
// All IntegerBits must agree with Prepare function.
|
|
||||||
// Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible.
|
|
||||||
static constexpr int kInputIntegerBits = 5;
|
|
||||||
static constexpr int kAccumulationIntegerBits = 12;
|
|
||||||
static constexpr int kOutputIntegerBits = 4;
|
|
||||||
using F5 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
|
|
||||||
using F12 = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
|
|
||||||
|
|
||||||
for (size_t outer_index = 0; outer_index < outer_size; ++outer_index) {
|
|
||||||
T max_in_row = kMinT8;
|
|
||||||
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
|
|
||||||
max_in_row =
|
|
||||||
std::max(max_in_row, input_data[outer_index * depth + inner_index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps.
|
|
||||||
F12 sum_of_exps_in_q12 = F12::FromRaw(0);
|
|
||||||
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
|
|
||||||
int32_t input_diff =
|
|
||||||
static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
|
|
||||||
max_in_row;
|
|
||||||
if (input_diff >= diff_min) {
|
|
||||||
const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
|
|
||||||
input_diff, input_multiplier, input_left_shift);
|
|
||||||
sum_of_exps_in_q12 =
|
|
||||||
sum_of_exps_in_q12 +
|
|
||||||
gemmlowp::Rescale<kAccumulationIntegerBits>(
|
|
||||||
exp_on_negative_values(F5::FromRaw(input_diff_in_q5)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const int32_t log_sum_of_exps_in_q5 =
|
|
||||||
log_x_for_x_greater_than_or_equal_to_1<kInputIntegerBits>(
|
|
||||||
sum_of_exps_in_q12)
|
|
||||||
.raw();
|
|
||||||
|
|
||||||
// Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is
|
|
||||||
// smallest representable in Q5.26 plus the log_sum_of_exps.
|
|
||||||
const int32_t shifted_log_sum_of_exps_in_q5 =
|
|
||||||
log_sum_of_exps_in_q5 + kMinInt32;
|
|
||||||
const int32_t adjusted_diff_min =
|
|
||||||
std::max(static_cast<int32_t>(diff_min - 1),
|
|
||||||
MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5,
|
|
||||||
reverse_scaling_divisor,
|
|
||||||
-reverse_scaling_right_shift));
|
|
||||||
|
|
||||||
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
|
|
||||||
int32_t input_diff =
|
|
||||||
static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
|
|
||||||
max_in_row;
|
|
||||||
// Note use of > below instead of >= above.
|
|
||||||
if (input_diff > adjusted_diff_min) {
|
|
||||||
const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
|
|
||||||
input_diff, input_multiplier, input_left_shift);
|
|
||||||
|
|
||||||
// Rescale and downcast.
|
|
||||||
int32_t output_in_q27 =
|
|
||||||
gemmlowp::RoundingDivideByPOT(
|
|
||||||
(input_diff_in_q5 - log_sum_of_exps_in_q5),
|
|
||||||
31 - kInputIntegerBits - kOutputIntegerBits) +
|
|
||||||
kMaxT8;
|
|
||||||
|
|
||||||
output_in_q27 =
|
|
||||||
std::max(std::min(output_in_q27, static_cast<int32_t>(kMaxT8)),
|
|
||||||
static_cast<int32_t>(kMinT8));
|
|
||||||
output_data[outer_index * depth + inner_index] =
|
|
||||||
static_cast<T>(output_in_q27);
|
|
||||||
} else {
|
|
||||||
output_data[outer_index * depth + inner_index] = kMinT8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void LogSoftmax(const SoftmaxParams& params, const size_t outer_size,
|
|
||||||
const size_t depth, const RuntimeShape& input_shape,
|
|
||||||
const int8_t* input_data,
|
|
||||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
|
||||||
LogSoftmaxQuantized(params, outer_size, depth, input_shape, input_data,
|
|
||||||
output_shape, output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace reference_ops
|
|
||||||
} // namespace tflite
|
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user