removed tflite-lib

This commit is contained in:
CaCO3
2022-09-23 21:29:17 +02:00
parent f6f70776d9
commit b1d4df4309
373 changed files with 0 additions and 90368 deletions

View File

@@ -1,72 +0,0 @@
## TODO: GLOB is not a good way to collect files. Use explicit file list instead
cmake_minimum_required(VERSION 3.5)
set(tflite_dir "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow/lite")
set(tfmicro_dir "${tflite_dir}/micro")
set(tfmicro_frontend_dir "${tflite_dir}/experimental/microfrontend/lib")
set(tfmicro_kernels_dir "${tfmicro_dir}/kernels")
file(GLOB srcs_micro
"${tfmicro_dir}/*.cc"
"${tfmicro_dir}/*.c")
file(GLOB src_micro_frontend
"${tfmicro_frontend_dir}/*.c"
"${tfmicro_frontend_dir}/*.cc")
file(GLOB srcs_kernels
"${tfmicro_kernels_dir}/*.c"
"${tfmicro_kernels_dir}/*.cc")
# remove sources which will be provided by esp_nn
list(REMOVE_ITEM srcs_kernels
"${tfmicro_kernels_dir}/add.cc"
"${tfmicro_kernels_dir}/conv.cc"
"${tfmicro_kernels_dir}/depthwise_conv.cc"
"${tfmicro_kernels_dir}/fully_connected.cc"
"${tfmicro_kernels_dir}/mul.cc"
"${tfmicro_kernels_dir}/pooling.cc"
"${tfmicro_kernels_dir}/softmax.cc")
FILE(GLOB esp_nn_kernels
"${tfmicro_kernels_dir}/esp_nn/*.cc")
set(lib_srcs
"${srcs_micro}"
"${srcs_kernels}"
"${esp_nn_kernels}"
"${src_micro_frontend}"
"${tflite_dir}/kernels/kernel_util.cc"
"${tflite_dir}/micro/memory_planner/greedy_memory_planner.cc"
"${tflite_dir}/micro/memory_planner/linear_memory_planner.cc"
"${tflite_dir}/micro/arena_allocator/non_persistent_arena_buffer_allocator.cc"
"${tflite_dir}/micro/arena_allocator/persistent_arena_buffer_allocator.cc"
"${tflite_dir}/micro/arena_allocator/recording_single_arena_buffer_allocator.cc"
"${tflite_dir}/micro/arena_allocator/single_arena_buffer_allocator.cc"
"${tflite_dir}/c/common.cc"
"${tflite_dir}/core/api/error_reporter.cc"
"${tflite_dir}/core/api/flatbuffer_conversions.cc"
"${tflite_dir}/core/api/op_resolver.cc"
"${tflite_dir}/core/api/tensor_utils.cc"
"${tflite_dir}/kernels/internal/quantization_util.cc"
"${tflite_dir}/schema/schema_utils.cc")
idf_component_register(
SRCS "${lib_srcs}"
INCLUDE_DIRS "." "third_party/gemmlowp"
"third_party/flatbuffers/include"
"third_party/ruy"
"third_party/kissfft"
REQUIRES "esp-nn")
# Reduce the level of paranoia to be able to compile TF sources
target_compile_options(${COMPONENT_LIB} PRIVATE
-Wno-maybe-uninitialized
-Wno-missing-field-initializers
-DESP_NN # enables ESP-NN optimizations by Espressif
-Wno-type-limits)
target_compile_options(${COMPONENT_LIB} PRIVATE -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -Wno-nonnull)
target_compile_options(${COMPONENT_LIB} PRIVATE $<$<COMPILE_LANGUAGE:CXX>: -std=c++11 -fno-rtti -fno-exceptions -fno-threadsafe-statics -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Werror -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -Wno-return-type -Wno-strict-aliasing -std=gnu++14 >)
target_compile_options(${COMPONENT_LIB} INTERFACE $<$<IN_LIST:-DTF_LITE_STATIC_MEMORY,$<TARGET_PROPERTY:${COMPONENT_LIB},COMPILE_OPTIONS>>:-DTF_LITE_STATIC_MEMORY>)
target_link_libraries(${COMPONENT_LIB} PRIVATE -lm)

View File

@@ -1,22 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Compatibility shim for new location of interface definitions.
#ifndef TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
#define TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#endif // TENSORFLOW_LITE_BUILTIN_OP_DATA_H_

View File

@@ -1,193 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_BUILTIN_OPS_H_
#define TENSORFLOW_LITE_BUILTIN_OPS_H_
// DO NOT EDIT MANUALLY: This file is automatically generated by
// `schema/builtin_ops_header/generator.cc`.
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
// The enum for builtin operators.
// Note: CUSTOM, DELEGATE, and PLACEHOLDER_FOR_GREATER_OP_CODES are 3 special
// ops which are not real built-in ops.
typedef enum {
kTfLiteBuiltinAdd = 0,
kTfLiteBuiltinAveragePool2d = 1,
kTfLiteBuiltinConcatenation = 2,
kTfLiteBuiltinConv2d = 3,
kTfLiteBuiltinDepthwiseConv2d = 4,
kTfLiteBuiltinDepthToSpace = 5,
kTfLiteBuiltinDequantize = 6,
kTfLiteBuiltinEmbeddingLookup = 7,
kTfLiteBuiltinFloor = 8,
kTfLiteBuiltinFullyConnected = 9,
kTfLiteBuiltinHashtableLookup = 10,
kTfLiteBuiltinL2Normalization = 11,
kTfLiteBuiltinL2Pool2d = 12,
kTfLiteBuiltinLocalResponseNormalization = 13,
kTfLiteBuiltinLogistic = 14,
kTfLiteBuiltinLshProjection = 15,
kTfLiteBuiltinLstm = 16,
kTfLiteBuiltinMaxPool2d = 17,
kTfLiteBuiltinMul = 18,
kTfLiteBuiltinRelu = 19,
kTfLiteBuiltinReluN1To1 = 20,
kTfLiteBuiltinRelu6 = 21,
kTfLiteBuiltinReshape = 22,
kTfLiteBuiltinResizeBilinear = 23,
kTfLiteBuiltinRnn = 24,
kTfLiteBuiltinSoftmax = 25,
kTfLiteBuiltinSpaceToDepth = 26,
kTfLiteBuiltinSvdf = 27,
kTfLiteBuiltinTanh = 28,
kTfLiteBuiltinConcatEmbeddings = 29,
kTfLiteBuiltinSkipGram = 30,
kTfLiteBuiltinCall = 31,
kTfLiteBuiltinCustom = 32,
kTfLiteBuiltinEmbeddingLookupSparse = 33,
kTfLiteBuiltinPad = 34,
kTfLiteBuiltinUnidirectionalSequenceRnn = 35,
kTfLiteBuiltinGather = 36,
kTfLiteBuiltinBatchToSpaceNd = 37,
kTfLiteBuiltinSpaceToBatchNd = 38,
kTfLiteBuiltinTranspose = 39,
kTfLiteBuiltinMean = 40,
kTfLiteBuiltinSub = 41,
kTfLiteBuiltinDiv = 42,
kTfLiteBuiltinSqueeze = 43,
kTfLiteBuiltinUnidirectionalSequenceLstm = 44,
kTfLiteBuiltinStridedSlice = 45,
kTfLiteBuiltinBidirectionalSequenceRnn = 46,
kTfLiteBuiltinExp = 47,
kTfLiteBuiltinTopkV2 = 48,
kTfLiteBuiltinSplit = 49,
kTfLiteBuiltinLogSoftmax = 50,
kTfLiteBuiltinDelegate = 51,
kTfLiteBuiltinBidirectionalSequenceLstm = 52,
kTfLiteBuiltinCast = 53,
kTfLiteBuiltinPrelu = 54,
kTfLiteBuiltinMaximum = 55,
kTfLiteBuiltinArgMax = 56,
kTfLiteBuiltinMinimum = 57,
kTfLiteBuiltinLess = 58,
kTfLiteBuiltinNeg = 59,
kTfLiteBuiltinPadv2 = 60,
kTfLiteBuiltinGreater = 61,
kTfLiteBuiltinGreaterEqual = 62,
kTfLiteBuiltinLessEqual = 63,
kTfLiteBuiltinSelect = 64,
kTfLiteBuiltinSlice = 65,
kTfLiteBuiltinSin = 66,
kTfLiteBuiltinTransposeConv = 67,
kTfLiteBuiltinSparseToDense = 68,
kTfLiteBuiltinTile = 69,
kTfLiteBuiltinExpandDims = 70,
kTfLiteBuiltinEqual = 71,
kTfLiteBuiltinNotEqual = 72,
kTfLiteBuiltinLog = 73,
kTfLiteBuiltinSum = 74,
kTfLiteBuiltinSqrt = 75,
kTfLiteBuiltinRsqrt = 76,
kTfLiteBuiltinShape = 77,
kTfLiteBuiltinPow = 78,
kTfLiteBuiltinArgMin = 79,
kTfLiteBuiltinFakeQuant = 80,
kTfLiteBuiltinReduceProd = 81,
kTfLiteBuiltinReduceMax = 82,
kTfLiteBuiltinPack = 83,
kTfLiteBuiltinLogicalOr = 84,
kTfLiteBuiltinOneHot = 85,
kTfLiteBuiltinLogicalAnd = 86,
kTfLiteBuiltinLogicalNot = 87,
kTfLiteBuiltinUnpack = 88,
kTfLiteBuiltinReduceMin = 89,
kTfLiteBuiltinFloorDiv = 90,
kTfLiteBuiltinReduceAny = 91,
kTfLiteBuiltinSquare = 92,
kTfLiteBuiltinZerosLike = 93,
kTfLiteBuiltinFill = 94,
kTfLiteBuiltinFloorMod = 95,
kTfLiteBuiltinRange = 96,
kTfLiteBuiltinResizeNearestNeighbor = 97,
kTfLiteBuiltinLeakyRelu = 98,
kTfLiteBuiltinSquaredDifference = 99,
kTfLiteBuiltinMirrorPad = 100,
kTfLiteBuiltinAbs = 101,
kTfLiteBuiltinSplitV = 102,
kTfLiteBuiltinUnique = 103,
kTfLiteBuiltinCeil = 104,
kTfLiteBuiltinReverseV2 = 105,
kTfLiteBuiltinAddN = 106,
kTfLiteBuiltinGatherNd = 107,
kTfLiteBuiltinCos = 108,
kTfLiteBuiltinWhere = 109,
kTfLiteBuiltinRank = 110,
kTfLiteBuiltinElu = 111,
kTfLiteBuiltinReverseSequence = 112,
kTfLiteBuiltinMatrixDiag = 113,
kTfLiteBuiltinQuantize = 114,
kTfLiteBuiltinMatrixSetDiag = 115,
kTfLiteBuiltinRound = 116,
kTfLiteBuiltinHardSwish = 117,
kTfLiteBuiltinIf = 118,
kTfLiteBuiltinWhile = 119,
kTfLiteBuiltinNonMaxSuppressionV4 = 120,
kTfLiteBuiltinNonMaxSuppressionV5 = 121,
kTfLiteBuiltinScatterNd = 122,
kTfLiteBuiltinSelectV2 = 123,
kTfLiteBuiltinDensify = 124,
kTfLiteBuiltinSegmentSum = 125,
kTfLiteBuiltinBatchMatmul = 126,
kTfLiteBuiltinPlaceholderForGreaterOpCodes = 127,
kTfLiteBuiltinCumsum = 128,
kTfLiteBuiltinCallOnce = 129,
kTfLiteBuiltinBroadcastTo = 130,
kTfLiteBuiltinRfft2d = 131,
kTfLiteBuiltinConv3d = 132,
kTfLiteBuiltinImag = 133,
kTfLiteBuiltinReal = 134,
kTfLiteBuiltinComplexAbs = 135,
kTfLiteBuiltinHashtable = 136,
kTfLiteBuiltinHashtableFind = 137,
kTfLiteBuiltinHashtableImport = 138,
kTfLiteBuiltinHashtableSize = 139,
kTfLiteBuiltinReduceAll = 140,
kTfLiteBuiltinConv3dTranspose = 141,
kTfLiteBuiltinVarHandle = 142,
kTfLiteBuiltinReadVariable = 143,
kTfLiteBuiltinAssignVariable = 144,
kTfLiteBuiltinBroadcastArgs = 145,
kTfLiteBuiltinRandomStandardNormal = 146,
kTfLiteBuiltinBucketize = 147,
kTfLiteBuiltinRandomUniform = 148,
kTfLiteBuiltinMultinomial = 149,
kTfLiteBuiltinGelu = 150,
kTfLiteBuiltinDynamicUpdateSlice = 151,
kTfLiteBuiltinRelu0To1 = 152,
kTfLiteBuiltinUnsortedSegmentProd = 153,
kTfLiteBuiltinUnsortedSegmentMax = 154,
kTfLiteBuiltinUnsortedSegmentSum = 155,
kTfLiteBuiltinAtan2 = 156,
kTfLiteBuiltinUnsortedSegmentMin = 157,
} TfLiteBuiltinOperator;
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // TENSORFLOW_LITE_BUILTIN_OPS_H_

View File

@@ -1,525 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
// number of dimensions.
#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
// TODO(aselle): Consider using "if this then that" for testing.
// Useful placeholder to put in otherwise empty structs to avoid size warnings.
typedef struct {
char dummy;
} EmptyStructPlaceholder;
// IMPORTANT: All new members of structs must be added at the end to ensure
// backwards compatibility.
// Possible padding types (for convolutions)
typedef enum {
kTfLitePaddingUnknown = 0,
kTfLitePaddingSame,
kTfLitePaddingValid,
} TfLitePadding;
typedef enum {
kTfLiteMirrorPaddingUnknown = 0,
kTfLiteMirrorPaddingReflect,
kTfLiteMirrorPaddingSymmetric,
} TfLiteMirrorPaddingMode;
// TODO(b/130259536): We should move this out of builtin_op_data.
typedef struct {
int width;
int height;
int width_offset;
int height_offset;
} TfLitePaddingValues;
typedef struct {
TfLiteMirrorPaddingMode mode;
} TfLiteMirrorPaddingParams;
// Possible fused activation functions.
typedef enum {
kTfLiteActNone = 0,
kTfLiteActRelu,
kTfLiteActReluN1To1, // min(max(-1, x), 1)
kTfLiteActRelu6, // min(max(0, x), 6)
kTfLiteActTanh,
kTfLiteActSignBit,
kTfLiteActSigmoid,
} TfLiteFusedActivation;
typedef struct {
// Parameters for CONV_2D version 1.
TfLitePadding padding;
int stride_width;
int stride_height;
TfLiteFusedActivation activation;
// Parameters for CONV_2D version 2.
// Note: Version 2 supports dilation values not equal to 1.
int dilation_width_factor;
int dilation_height_factor;
} TfLiteConvParams;
typedef struct {
TfLitePadding padding;
int stride_width;
int stride_height;
int stride_depth;
int dilation_width_factor;
int dilation_height_factor;
int dilation_depth_factor;
TfLiteFusedActivation activation;
} TfLiteConv3DParams;
typedef TfLiteConv3DParams TfLiteConv3DTransposeParams;
typedef struct {
TfLitePadding padding;
int stride_width;
int stride_height;
int filter_width;
int filter_height;
TfLiteFusedActivation activation;
struct {
TfLitePaddingValues padding;
} computed;
} TfLitePoolParams;
typedef struct {
// Parameters for DepthwiseConv version 1 or above.
TfLitePadding padding;
int stride_width;
int stride_height;
// `depth_multiplier` is redundant. It's used by CPU kernels in
// TensorFlow 2.0 or below, but ignored in versions above.
//
// The information can be deduced from the shape of input and the shape of
// weights. Since the TFLiteConverter toolchain doesn't support partially
// specified shapes, relying on `depth_multiplier` stops us from supporting
// graphs with dynamic shape tensors.
//
// Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
// field.
int depth_multiplier;
TfLiteFusedActivation activation;
// Parameters for DepthwiseConv version 2 or above.
int dilation_width_factor;
int dilation_height_factor;
} TfLiteDepthwiseConvParams;
typedef struct {
int rank;
TfLiteFusedActivation activation;
// Parameter for SVDF version 4.
bool asymmetric_quantize_inputs;
} TfLiteSVDFParams;
typedef struct {
TfLiteFusedActivation activation;
// Parameter for RNN version 3.
bool asymmetric_quantize_inputs;
} TfLiteRNNParams;
typedef struct {
bool time_major;
TfLiteFusedActivation activation;
// Parameter for Sequence RNN version 3.
bool asymmetric_quantize_inputs;
} TfLiteSequenceRNNParams;
typedef struct {
bool time_major;
TfLiteFusedActivation activation;
bool merge_outputs;
// Parameter for Bidirectional RNN verison 3.
bool asymmetric_quantize_inputs;
} TfLiteBidirectionalSequenceRNNParams;
typedef enum {
kTfLiteFullyConnectedWeightsFormatDefault = 0,
kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
} TfLiteFullyConnectedWeightsFormat;
typedef struct {
// Parameters for FullyConnected version 1 or above.
TfLiteFusedActivation activation;
// Parameters for FullyConnected version 2 or above.
TfLiteFullyConnectedWeightsFormat weights_format;
// Parameters for FullyConnected version 5 or above.
// If set to true, then the number of dimensions in the input and the output
// tensors are the same. Furthermore, all but the last dimension of the input
// and output shapes will be equal.
bool keep_num_dims;
// Parameters for FullyConnected version 7 or above.
// If set to true and the weights are quantized, then non constant inputs
// are quantized at evaluation time with asymmetric quantization.
bool asymmetric_quantize_inputs;
} TfLiteFullyConnectedParams;
typedef enum {
kTfLiteLshProjectionUnknown = 0,
kTfLiteLshProjectionSparse = 1,
kTfLiteLshProjectionDense = 2,
} TfLiteLSHProjectionType;
typedef struct {
TfLiteLSHProjectionType type;
} TfLiteLSHProjectionParams;
typedef struct {
float beta;
} TfLiteSoftmaxParams;
typedef struct {
int axis;
TfLiteFusedActivation activation;
} TfLiteConcatenationParams;
typedef struct {
TfLiteFusedActivation activation;
// Parameter added for the version 4.
bool pot_scale_int16;
} TfLiteAddParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteSpaceToBatchNDParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteBatchToSpaceNDParams;
typedef struct {
bool adj_x;
bool adj_y;
// Parameters for BatchMatMul version 4 or above.
// If set to true and the weights are quantized, then non constant inputs
// are quantized at evaluation time with asymmetric quantization.
bool asymmetric_quantize_inputs;
} TfLiteBatchMatMulParams;
typedef struct {
TfLiteFusedActivation activation;
} TfLiteMulParams;
typedef struct {
TfLiteFusedActivation activation;
// Parameter added for the version 5.
bool pot_scale_int16;
} TfLiteSubParams;
typedef struct {
TfLiteFusedActivation activation;
} TfLiteDivParams;
typedef struct {
TfLiteFusedActivation activation;
} TfLiteL2NormParams;
typedef struct {
int radius;
float bias;
float alpha;
float beta;
} TfLiteLocalResponseNormParams;
typedef enum {
kTfLiteLSTMFullKernel = 0,
kTfLiteLSTMBasicKernel
} TfLiteLSTMKernelType;
typedef struct {
// Parameters for LSTM version 1.
TfLiteFusedActivation activation;
float cell_clip;
float proj_clip;
// Parameters for LSTM version 2.
// kTfLiteLSTMBasicKernel is only supported in version 2 or above.
TfLiteLSTMKernelType kernel_type;
// Parameters for LSTM version 4.
bool asymmetric_quantize_inputs;
} TfLiteLSTMParams;
typedef struct {
// Parameters needed for the underlying LSTM.
TfLiteFusedActivation activation;
float cell_clip;
float proj_clip;
// If set to true then the first dimension is time, otherwise batch.
bool time_major;
// Parameter for unidirectional sequence RNN version 3.
bool asymmetric_quantize_inputs;
} TfLiteUnidirectionalSequenceLSTMParams;
typedef struct {
// Parameters supported by version 1:
// Parameters inherited for the LSTM kernel.
TfLiteFusedActivation activation;
float cell_clip;
float proj_clip;
// If true, store the outputs of both directions in the first output.
bool merge_outputs;
// Parameters supported by version 2:
// If set to true then the first dimension is time, otherwise batch.
bool time_major;
// Parameters supported by version 4:
// If set to true, then hybrid ops use asymmetric quantization for inputs.
bool asymmetric_quantize_inputs;
} TfLiteBidirectionalSequenceLSTMParams;
typedef struct {
bool align_corners;
// half_pixel_centers assumes pixels are of half the actual dimensions, and
// yields more accurate resizes. Corresponds to the same argument for the
// original TensorFlow op in TF2.0.
bool half_pixel_centers;
} TfLiteResizeBilinearParams;
typedef struct {
bool align_corners;
bool half_pixel_centers;
} TfLiteResizeNearestNeighborParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLitePadParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLitePadV2Params;
typedef struct {
// These fields are only used in old models for backward compatibility.
// In the current implementation, we use the 2nd input of the op as the shape,
// and these fields are unused.
int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
int num_dimensions;
} TfLiteReshapeParams;
typedef struct {
int ngram_size;
int max_skip_size;
bool include_all_ngrams;
} TfLiteSkipGramParams;
typedef struct {
int block_size;
} TfLiteSpaceToDepthParams;
typedef struct {
int block_size;
} TfLiteDepthToSpaceParams;
typedef struct {
TfLiteType in_data_type;
TfLiteType out_data_type;
} TfLiteCastParams;
typedef enum {
kTfLiteCombinerTypeSum = 0,
kTfLiteCombinerTypeMean = 1,
kTfLiteCombinerTypeSqrtn = 2,
} TfLiteCombinerType;
typedef struct {
TfLiteCombinerType combiner;
} TfLiteEmbeddingLookupSparseParams;
typedef struct {
int axis;
int batch_dims;
} TfLiteGatherParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteTransposeParams;
typedef struct {
bool keep_dims;
} TfLiteReducerParams;
typedef struct {
int num_splits;
} TfLiteSplitParams;
typedef struct {
int num_splits;
} TfLiteSplitVParams;
typedef struct {
// TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
// For now we will fix the maximum possible number of dimensions.
int squeeze_dims[8];
int num_squeeze_dims;
} TfLiteSqueezeParams;
typedef struct {
int begin_mask;
int end_mask;
int ellipsis_mask;
int new_axis_mask;
int shrink_axis_mask;
} TfLiteStridedSliceParams;
typedef struct {
TfLiteType output_type;
} TfLiteArgMaxParams;
typedef struct {
TfLiteType output_type;
} TfLiteArgMinParams;
typedef struct {
TfLitePadding padding;
int stride_width;
int stride_height;
} TfLiteTransposeConvParams;
typedef struct {
bool validate_indices;
} TfLiteSparseToDenseParams;
typedef struct {
TfLiteType out_type;
} TfLiteShapeParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteRankParams;
typedef struct {
// Parameters supported by version 1:
float min;
float max;
int num_bits;
// Parameters supported by version 2:
bool narrow_range;
} TfLiteFakeQuantParams;
typedef struct {
int values_count;
int axis;
} TfLitePackParams;
typedef struct {
int axis;
} TfLiteOneHotParams;
typedef struct {
int num;
int axis;
} TfLiteUnpackParams;
typedef struct {
float alpha;
} TfLiteLeakyReluParams;
typedef struct {
TfLiteType index_out_type;
} TfLiteUniqueParams;
typedef struct {
int seq_dim;
int batch_dim;
} TfLiteReverseSequenceParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteMatrixDiagParams;
typedef struct {
EmptyStructPlaceholder placeholder;
} TfLiteMatrixSetDiagParams;
typedef struct {
int then_subgraph_index;
int else_subgraph_index;
} TfLiteIfParams;
typedef struct {
int cond_subgraph_index;
int body_subgraph_index;
} TfLiteWhileParams;
typedef struct {
bool exclusive;
bool reverse;
} TfLiteCumsumParams;
typedef struct {
int init_subgraph_index;
} TfLiteCallOnceParams;
typedef struct {
int table_id;
TfLiteType key_dtype;
TfLiteType value_dtype;
} TfLiteHashtableParams;
typedef struct {
const char* container;
const char* shared_name;
} TfLiteVarHandleParams;
typedef struct {
int seed;
int seed2;
} TfLiteRandomParams;
typedef struct {
int num_boundaries;
// This points to the memory stored in the model (flatbuffer),
// and is not owned.
const float* boundaries;
} TfLiteBucketizeParams;
typedef struct {
bool approximate;
} TfLiteGeluParams;
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_

View File

@@ -1,130 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This file declares types used by the pure C inference API defined in c_api.h,
// some of which are also used in the C++ and C kernel and interpreter APIs.
#ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_
#define TENSORFLOW_LITE_C_C_API_TYPES_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
// library.
#ifdef SWIG
#define TFL_CAPI_EXPORT
#elif defined(TFL_STATIC_LIBRARY_BUILD)
#define TFL_CAPI_EXPORT
#else // not definded TFL_STATIC_LIBRARY_BUILD
#if defined(_WIN32)
#ifdef TFL_COMPILE_LIBRARY
#define TFL_CAPI_EXPORT __declspec(dllexport)
#else
#define TFL_CAPI_EXPORT __declspec(dllimport)
#endif // TFL_COMPILE_LIBRARY
#else
#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
#endif // _WIN32
#endif // SWIG
// Note that new error status values may be added in future in order to
// indicate more fine-grained internal states, therefore, applications should
// not rely on status values being members of the enum.
typedef enum TfLiteStatus {
kTfLiteOk = 0,
// Generally referring to an error in the runtime (i.e. interpreter)
kTfLiteError = 1,
// Generally referring to an error from a TfLiteDelegate itself.
kTfLiteDelegateError = 2,
// Generally referring to an error in applying a delegate due to
// incompatibility between runtime and delegate, e.g., this error is returned
// when trying to apply a TF Lite delegate onto a model graph that's already
// immutable.
kTfLiteApplicationError = 3,
// Generally referring to serialized delegate data not being found.
// See tflite::delegates::Serialization.
kTfLiteDelegateDataNotFound = 4,
// Generally referring to data-writing issues in delegate serialization.
// See tflite::delegates::Serialization.
kTfLiteDelegateDataWriteError = 5,
// Generally referring to data-reading issues in delegate serialization.
// See tflite::delegates::Serialization.
kTfLiteDelegateDataReadError = 6,
// Generally referring to issues when the TF Lite model has ops that cannot be
// resolved at runtime. This could happen when the specific op is not
// registered or built with the TF Lite framework.
kTfLiteUnresolvedOps = 7,
} TfLiteStatus;
// Types supported by tensor
typedef enum {
kTfLiteNoType = 0,
kTfLiteFloat32 = 1,
kTfLiteInt32 = 2,
kTfLiteUInt8 = 3,
kTfLiteInt64 = 4,
kTfLiteString = 5,
kTfLiteBool = 6,
kTfLiteInt16 = 7,
kTfLiteComplex64 = 8,
kTfLiteInt8 = 9,
kTfLiteFloat16 = 10,
kTfLiteFloat64 = 11,
kTfLiteComplex128 = 12,
kTfLiteUInt64 = 13,
kTfLiteResource = 14,
kTfLiteVariant = 15,
kTfLiteUInt32 = 16,
kTfLiteUInt16 = 17,
} TfLiteType;
// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
// If per-layer quantization is specified this field will still be populated in
// addition to TfLiteAffineQuantization.
// Parameters for asymmetric quantization. Quantized values can be converted
// back to float using:
// real_value = scale * (quantized_value - zero_point)
typedef struct TfLiteQuantizationParams {
float scale;
int32_t zero_point;
} TfLiteQuantizationParams;
// --------------------------------------------------------------------------
// Opaque types used by c_api.h, c_api_opaque.h and common.h.
// TfLiteOpaqueContext is an opaque version of TfLiteContext;
typedef struct TfLiteOpaqueContext TfLiteOpaqueContext;
// TfLiteOpaqueNode is an opaque version of TfLiteNode;
typedef struct TfLiteOpaqueNode TfLiteOpaqueNode;
// TfLiteOpaqueTensor is an opaque version of TfLiteTensor;
typedef struct TfLiteOpaqueTensor TfLiteOpaqueTensor;
#ifdef __cplusplus
} // extern C
#endif
#endif // TENSORFLOW_LITE_C_C_API_TYPES_H_

View File

@@ -1,286 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/c/c_api_types.h"
#ifdef TF_LITE_TENSORFLOW_PROFILER
#include "tensorflow/lite/tensorflow_profiler_logger.h"
#endif
#ifndef TF_LITE_STATIC_MEMORY
#include <stdlib.h>
#include <string.h>
#endif // TF_LITE_STATIC_MEMORY
extern "C" {
size_t TfLiteIntArrayGetSizeInBytes(int size) {
static TfLiteIntArray dummy;
size_t computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size;
#if defined(_MSC_VER)
// Context for why this is needed is in http://b/189926408#comment21
computed_size -= sizeof(dummy.data[0]);
#endif
return computed_size;
}
int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b) {
if (a == b) return 1;
if (a == nullptr || b == nullptr) return 0;
return TfLiteIntArrayEqualsArray(a, b->size, b->data);
}
int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
const int b_data[]) {
if (a == nullptr) return (b_size == 0);
if (a->size != b_size) return 0;
int i = 0;
for (; i < a->size; i++)
if (a->data[i] != b_data[i]) return 0;
return 1;
}
#ifndef TF_LITE_STATIC_MEMORY
TfLiteIntArray* TfLiteIntArrayCreate(int size) {
size_t alloc_size = TfLiteIntArrayGetSizeInBytes(size);
if (alloc_size <= 0) return nullptr;
TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
if (!ret) return ret;
ret->size = size;
return ret;
}
TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src) {
if (!src) return nullptr;
TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size);
if (ret) {
memcpy(ret->data, src->data, src->size * sizeof(int));
}
return ret;
}
void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); }
#endif // TF_LITE_STATIC_MEMORY
int TfLiteFloatArrayGetSizeInBytes(int size) {
static TfLiteFloatArray dummy;
int computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size;
#if defined(_MSC_VER)
// Context for why this is needed is in http://b/189926408#comment21
computed_size -= sizeof(dummy.data[0]);
#endif
return computed_size;
}
#ifndef TF_LITE_STATIC_MEMORY
TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
TfLiteFloatArray* ret =
(TfLiteFloatArray*)malloc(TfLiteFloatArrayGetSizeInBytes(size));
ret->size = size;
return ret;
}
void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
void TfLiteTensorDataFree(TfLiteTensor* t) {
if (t->allocation_type == kTfLiteDynamic ||
t->allocation_type == kTfLitePersistentRo) {
if (t->data.raw) {
#ifdef TF_LITE_TENSORFLOW_PROFILER
tflite::OnTfLiteTensorDealloc(t);
#endif
free(t->data.raw);
}
}
t->data.raw = nullptr;
}
void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
if (quantization->type == kTfLiteAffineQuantization) {
TfLiteAffineQuantization* q_params =
(TfLiteAffineQuantization*)(quantization->params);
if (q_params->scale) {
TfLiteFloatArrayFree(q_params->scale);
q_params->scale = nullptr;
}
if (q_params->zero_point) {
TfLiteIntArrayFree(q_params->zero_point);
q_params->zero_point = nullptr;
}
free(q_params);
}
quantization->params = nullptr;
quantization->type = kTfLiteNoQuantization;
}
void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
if (sparsity == nullptr) {
return;
}
if (sparsity->traversal_order) {
TfLiteIntArrayFree(sparsity->traversal_order);
sparsity->traversal_order = nullptr;
}
if (sparsity->block_map) {
TfLiteIntArrayFree(sparsity->block_map);
sparsity->block_map = nullptr;
}
if (sparsity->dim_metadata) {
int i = 0;
for (; i < sparsity->dim_metadata_size; i++) {
TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i];
if (metadata.format == kTfLiteDimSparseCSR) {
TfLiteIntArrayFree(metadata.array_segments);
metadata.array_segments = nullptr;
TfLiteIntArrayFree(metadata.array_indices);
metadata.array_indices = nullptr;
}
}
free(sparsity->dim_metadata);
sparsity->dim_metadata = nullptr;
}
free(sparsity);
}
void TfLiteTensorFree(TfLiteTensor* t) {
TfLiteTensorDataFree(t);
if (t->dims) TfLiteIntArrayFree(t->dims);
t->dims = nullptr;
if (t->dims_signature) {
TfLiteIntArrayFree((TfLiteIntArray*)t->dims_signature);
}
t->dims_signature = nullptr;
TfLiteQuantizationFree(&t->quantization);
TfLiteSparsityFree(t->sparsity);
t->sparsity = nullptr;
}
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
TfLiteQuantizationParams quantization, char* buffer,
size_t size, TfLiteAllocationType allocation_type,
const void* allocation, bool is_variable,
TfLiteTensor* tensor) {
TfLiteTensorFree(tensor);
tensor->type = type;
tensor->name = name;
tensor->dims = dims;
tensor->params = quantization;
tensor->data.raw = buffer;
tensor->bytes = size;
tensor->allocation_type = allocation_type;
tensor->allocation = allocation;
tensor->is_variable = is_variable;
tensor->quantization.type = kTfLiteNoQuantization;
tensor->quantization.params = nullptr;
}
TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) {
if (!src || !dst) return kTfLiteOk;
if (src->bytes != dst->bytes) return kTfLiteError;
if (src == dst) return kTfLiteOk;
dst->type = src->type;
if (dst->dims) TfLiteIntArrayFree(dst->dims);
dst->dims = TfLiteIntArrayCopy(src->dims);
memcpy(dst->data.raw, src->data.raw, src->bytes);
dst->buffer_handle = src->buffer_handle;
dst->data_is_stale = src->data_is_stale;
dst->delegate = src->delegate;
return kTfLiteOk;
}
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
if (tensor->allocation_type != kTfLiteDynamic &&
tensor->allocation_type != kTfLitePersistentRo) {
return;
}
// TODO(b/145340303): Tensor data should be aligned.
if (!tensor->data.raw) {
tensor->data.raw = (char*)malloc(num_bytes);
#ifdef TF_LITE_TENSORFLOW_PROFILER
tflite::OnTfLiteTensorAlloc(tensor, num_bytes);
#endif
} else if (num_bytes > tensor->bytes) {
#ifdef TF_LITE_TENSORFLOW_PROFILER
tflite::OnTfLiteTensorDealloc(tensor);
#endif
tensor->data.raw = (char*)realloc(tensor->data.raw, num_bytes);
#ifdef TF_LITE_TENSORFLOW_PROFILER
tflite::OnTfLiteTensorAlloc(tensor, num_bytes);
#endif
}
tensor->bytes = num_bytes;
}
#endif // TF_LITE_STATIC_MEMORY
const char* TfLiteTypeGetName(TfLiteType type) {
switch (type) {
case kTfLiteNoType:
return "NOTYPE";
case kTfLiteFloat32:
return "FLOAT32";
case kTfLiteUInt16:
return "UINT16";
case kTfLiteInt16:
return "INT16";
case kTfLiteInt32:
return "INT32";
case kTfLiteUInt32:
return "UINT32";
case kTfLiteUInt8:
return "UINT8";
case kTfLiteInt8:
return "INT8";
case kTfLiteInt64:
return "INT64";
case kTfLiteUInt64:
return "UINT64";
case kTfLiteBool:
return "BOOL";
case kTfLiteComplex64:
return "COMPLEX64";
case kTfLiteComplex128:
return "COMPLEX128";
case kTfLiteString:
return "STRING";
case kTfLiteFloat16:
return "FLOAT16";
case kTfLiteFloat64:
return "FLOAT64";
case kTfLiteResource:
return "RESOURCE";
case kTfLiteVariant:
return "VARIANT";
}
return "Unknown type";
}
TfLiteDelegate TfLiteDelegateCreate() { return TfLiteDelegate{}; }
} // extern "C"

File diff suppressed because it is too large Load Diff

View File

@@ -1,51 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This provides a few C++ helpers that are useful for manipulating C structures
// in C++.
#ifndef TENSORFLOW_LITE_CONTEXT_UTIL_H_
#define TENSORFLOW_LITE_CONTEXT_UTIL_H_
#include <stddef.h>
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Provide a range iterable wrapper for TfLiteIntArray* (C lists that TfLite
// C api uses. Can't use the google array_view, since we can't depend on even
// absl for embedded device reasons.
class TfLiteIntArrayView {
public:
// Construct a view of a TfLiteIntArray*. Note, `int_array` should be non-null
// and this view does not take ownership of it.
explicit TfLiteIntArrayView(const TfLiteIntArray* int_array)
: int_array_(int_array) {}
TfLiteIntArrayView(const TfLiteIntArrayView&) = default;
TfLiteIntArrayView& operator=(const TfLiteIntArrayView& rhs) = default;
typedef const int* const_iterator;
const_iterator begin() const { return int_array_->data; }
const_iterator end() const { return &int_array_->data[int_array_->size]; }
size_t size() const { return end() - begin(); }
int operator[](size_t pos) const { return int_array_->data[pos]; }
private:
const TfLiteIntArray* int_array_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_CONTEXT_UTIL_H_

View File

@@ -1,38 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/core/api/error_reporter.h"
#include <cstdarg>
namespace tflite {
int ErrorReporter::Report(const char* format, ...) {
va_list args;
va_start(args, format);
int code = Report(format, args);
va_end(args);
return code;
}
// TODO(aselle): Make the name of ReportError on context the same, so
// we can use the ensure functions w/o a context and w/ a reporter.
int ErrorReporter::ReportError(void*, const char* format, ...) {
va_list args;
va_start(args, format);
int code = Report(format, args);
va_end(args);
return code;
}
} // namespace tflite

View File

@@ -1,59 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
#include <cstdarg>
namespace tflite {
/// A functor that reports error to supporting system. Invoked similar to
/// printf.
///
/// Usage:
/// ErrorReporter foo;
/// foo.Report("test %d", 5);
/// or
/// va_list args;
/// foo.Report("test %d", args); // where args is va_list
///
/// Subclass ErrorReporter to provide another reporting destination.
/// For example, if you have a GUI program, you might redirect to a buffer
/// that drives a GUI error log box.
class ErrorReporter {
public:
virtual ~ErrorReporter() {}
virtual int Report(const char* format, va_list args) = 0;
int Report(const char* format, ...);
int ReportError(void*, const char* format, ...);
};
} // namespace tflite
// You should not make bare calls to the error reporter, instead use the
// TF_LITE_REPORT_ERROR macro, since this allows message strings to be
// stripped when the binary size has to be optimized. If you are looking to
// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and
// every call will be stubbed out, taking no memory.
#ifndef TF_LITE_STRIP_ERROR_STRINGS
#define TF_LITE_REPORT_ERROR(reporter, ...) \
do { \
static_cast<tflite::ErrorReporter*>(reporter)->Report(__VA_ARGS__); \
} while (false)
#else // TF_LITE_STRIP_ERROR_STRINGS
#define TF_LITE_REPORT_ERROR(reporter, ...)
#endif // TF_LITE_STRIP_ERROR_STRINGS
#endif // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_

View File

@@ -1,408 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
#define TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
// These functions transform codes and data structures that are defined in the
// flatbuffer serialization format into in-memory values that are used by the
// runtime API and interpreter.
#include <cstddef>
#include <new>
#include <type_traits>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Interface class for builtin data allocations.
class BuiltinDataAllocator {
public:
virtual void* Allocate(size_t size, size_t alignment_hint) = 0;
virtual void Deallocate(void* data) = 0;
// Allocate a structure, but make sure it is a POD structure that doesn't
// require constructors to run. The reason we do this, is that Interpreter's C
// extension part will take ownership so destructors will not be run during
// deallocation.
template <typename T>
T* AllocatePOD() {
// TODO(b/154346074): Change this to is_trivially_destructible when all
// platform targets support that properly.
static_assert(std::is_pod<T>::value, "Builtin data structure must be POD.");
void* allocated_memory = this->Allocate(sizeof(T), alignof(T));
return new (allocated_memory) T();
}
virtual ~BuiltinDataAllocator() {}
};
// Parse the appropriate data out of the op.
//
// This handles builtin data explicitly as there are flatbuffer schemas.
// If it returns kTfLiteOk, it passes the data out with `builtin_data`. The
// calling function has to pass in an allocator object, and this allocator
// will be called to reserve space for the output data. If the calling
// function's allocator reserves memory on the heap, then it's the calling
// function's responsibility to free it.
// If it returns kTfLiteError, `builtin_data` will be `nullptr`.
TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
// Converts the tensor data type used in the flat buffer to the representation
// used by the runtime.
TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
ErrorReporter* error_reporter);
TfLiteStatus ParseAbs(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseAddN(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseAssignVariable(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseBatchMatMul(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseBatchToSpaceNd(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseBroadcastArgs(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseBroadcastTo(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseCallOnce(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseCast(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseConcatenation(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseCos(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseCumsum(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseDepthToSpace(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseDepthwiseConv2D(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseDequantize(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseDiv(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseElu(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseExp(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseExpandDims(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseFill(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseFloor(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseFloorDiv(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseFloorMod(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseFullyConnected(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseGather(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseGatherNd(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseGreater(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseGreaterEqual(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseL2Normalization(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLeakyRelu(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLess(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseLessEqual(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLog(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseLogicalAnd(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLogicalNot(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLogicalOr(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLogistic(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLogSoftmax(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseLSTM(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseMirrorPad(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseNeg(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseNotEqual(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePad(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePadV2(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePow(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParsePrelu(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseReadVariable(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseRelu(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseRelu6(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseResizeBilinear(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseResizeNearestNeighbor(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSin(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSlice(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSpaceToBatchNd(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseSpaceToDepth(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSplitV(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSqueeze(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSquaredDifference(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseStridedSlice(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseTanh(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseTranspose(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseTransposeConv(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseUnidirectionalSequenceLSTM(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseVarHandle(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseWhile(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseZerosLike(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
} // namespace tflite
#endif // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_

View File

@@ -1,68 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/core/api/op_resolver.h"
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_utils.h"
namespace tflite {
TfLiteStatus GetRegistrationFromOpCode(
const OperatorCode* opcode, const OpResolver& op_resolver,
ErrorReporter* error_reporter, const TfLiteRegistration** registration) {
TfLiteStatus status = kTfLiteOk;
*registration = nullptr;
auto builtin_code = GetBuiltinCode(opcode);
int version = opcode->version();
if (builtin_code > BuiltinOperator_MAX) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Op builtin_code out of range: %d. Are you using old TFLite binary "
"with newer model?",
builtin_code);
status = kTfLiteError;
} else if (builtin_code != BuiltinOperator_CUSTOM) {
*registration = op_resolver.FindOp(builtin_code, version);
if (*registration == nullptr) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Didn't find op for builtin opcode '%s' version '%d'. "
"An older version of this builtin might be supported. "
"Are you using an old TFLite binary with a newer model?\n",
EnumNameBuiltinOperator(builtin_code), version);
status = kTfLiteError;
}
} else if (!opcode->custom_code()) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Operator with CUSTOM builtin_code has no custom_code.\n");
status = kTfLiteError;
} else {
const char* name = opcode->custom_code()->c_str();
*registration = op_resolver.FindOp(name, version);
if (*registration == nullptr) {
// Do not report error for unresolved custom op, we do the final check
// while preparing ops.
status = kTfLiteError;
}
}
return status;
}
} // namespace tflite

View File

@@ -1,140 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
#include <functional>
#include <memory>
#include <vector>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
// Opaque type similar to TfLiteDelegate / TfLiteOpaqueDelegate.
// This is used for cases (e.g. when using "TF Lite with Google Play Services")
// where the TF Lite runtime might be built using a newer (or older)
// version of the TF Lite sources than the app, and hence might have a
// different definition of the TfLiteDelegate type. TF Lite APIs use
// TfLiteOpaqueDelegate rather than TfLiteDelegate when they want to
// refer to a delegate defined with that potentially different version
// of the TfLiteDelegate type.
struct TfLiteOpaqueDelegateStruct;
namespace tflite {
/// Abstract interface that returns TfLiteRegistrations given op codes or custom
/// op names. This is the mechanism that ops being referenced in the flatbuffer
/// model are mapped to executable function pointers (TfLiteRegistrations).
class OpResolver {
public:
/// Finds the op registration for a builtin operator by enum code.
virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
int version) const = 0;
/// Finds the op registration of a custom operator by op name.
virtual const TfLiteRegistration* FindOp(const char* op,
int version) const = 0;
// Represents a sequence of delegates.
using TfLiteDelegatePtrVector =
std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
// Returns optional delegates for resolving and handling ops in the flatbuffer
// model. This may be used in addition to the standard TfLiteRegistration
// lookup for graph resolution.
// WARNING: This API is deprecated, GetDelegateCreators is preferred.
virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const {
return {};
}
// Represents a function that creates a TfLite delegate instance.
using TfLiteDelegateCreator =
std::function<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>(
int /*num_threads*/)>;
// Represents a sequence of delegate creator functions.
using TfLiteDelegateCreators = std::vector<TfLiteDelegateCreator>;
// Returns a vector of delegate creators to create optional delegates for
// resolving and handling ops in the flatbuffer model. This may be used in
// addition to the standard TfLiteRegistration lookup for graph resolution.
//
// Note that this method is not used (will not be called) if you are using
// TF Lite in Google Play Services; the GetOpaqueDelegateCreators method
// (see below) is used for that case.
virtual TfLiteDelegateCreators GetDelegateCreators() const { return {}; }
// TODO(b/202712825): it would be nice if we could avoid the need for separate
// "opaque" types & methods for use only with TF Lite in Google Play Services.
// Represents an opaque delegate instance.
// WARNING: Experimental interface, subject to change.
using TfLiteOpaqueDelegatePtr =
std::unique_ptr<TfLiteOpaqueDelegateStruct,
void (*)(TfLiteOpaqueDelegateStruct*)>;
// Represents a function that creates an opaque delegate instance.
// WARNING: Experimental interface, subject to change.
using TfLiteOpaqueDelegateCreator =
std::function<TfLiteOpaqueDelegatePtr(int /*num_threads*/)>;
// Represents a sequence of opaque delegate creator functions.
// WARNING: Experimental interface, subject to change.
using TfLiteOpaqueDelegateCreators = std::vector<TfLiteOpaqueDelegateCreator>;
// Returns a vector of opaque delegate creators to create optional opaque
// delegates for resolving and handling ops in the flatbuffer model. This may
// be used in addition to the standard TfLiteRegistration lookup for graph
// resolution.
//
// Note that this method will be called only if you are using TF Lite in
// Google Play Services; if you are using regular TF Lite, GetDelegateCreators
// (see above) is used instead.
//
// WARNING: Experimental interface, subject to change.
virtual TfLiteOpaqueDelegateCreators GetOpaqueDelegateCreators() const {
return {};
}
virtual ~OpResolver() {}
private:
/// Returns true if this OpResolver may contain any "user defined" ops.
/// By "user defined" ops, we mean any op definitions other than those
/// contained in tflite::ops::builtin::BuiltinOpResolver.
///
/// If this method returns true, it doesn't necessarily mean that the
/// OpResolver contains a user-defined op, just that the absence of
/// user-defined ops can't be guaranteed.
///
/// Note that "user-defined" ops are not the same as "custom" ops;
/// BuiltinOpResolver may support certain "custom" ops, in addition to
/// "builtin" ops, and may not support all of the "builtin" op enum values.
virtual bool MayContainUserDefinedOps() const { return true; }
friend class OpResolverInternal;
};
// Handles the logic for converting between an OperatorCode structure extracted
// from a flatbuffer and information about a registered operator
// implementation.
TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
const OpResolver& op_resolver,
ErrorReporter* error_reporter,
const TfLiteRegistration** registration);
} // namespace tflite
#endif // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_

View File

@@ -1,50 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/core/api/tensor_utils.h"
#include <string.h>
#include "tensorflow/lite/c/common.h"
namespace tflite {
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
if (!tensor->is_variable) {
return kTfLiteOk;
}
// TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
// to the value of the buffer.
int value = 0;
if (tensor->type == kTfLiteInt8) {
value = tensor->params.zero_point;
}
// TODO(b/139446230): Provide a platform header to better handle these
// specific scenarios.
#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
defined(__i386) || defined(__x86__) || defined(__X86__) || \
defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
memset(tensor->data.raw, value, tensor->bytes);
#else
char* raw_ptr = tensor->data.raw;
for (size_t i = 0; i < tensor->bytes; ++i) {
*raw_ptr = value;
raw_ptr++;
}
#endif
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,28 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Resets a variable tensor to the default value.
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor);
} // namespace tflite
#endif // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_

View File

@@ -1,102 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_
#ifdef __cplusplus
#include <cstdint>
extern "C" {
#endif
static inline int CountLeadingZeros32Slow(uint64_t n) {
int zeroes = 28;
if (n >> 16) zeroes -= 16, n >>= 16;
if (n >> 8) zeroes -= 8, n >>= 8;
if (n >> 4) zeroes -= 4, n >>= 4;
return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
}
static inline int CountLeadingZeros32(uint32_t n) {
#if defined(_MSC_VER)
unsigned long result = 0; // NOLINT(runtime/int)
if (_BitScanReverse(&result, n)) {
return 31 - result;
}
return 32;
#elif defined(__GNUC__)
// Handle 0 as a special case because __builtin_clz(0) is undefined.
if (n == 0) {
return 32;
}
return __builtin_clz(n);
#else
return CountLeadingZeros32Slow(n);
#endif
}
static inline int MostSignificantBit32(uint32_t n) {
return 32 - CountLeadingZeros32(n);
}
static inline int CountLeadingZeros64Slow(uint64_t n) {
int zeroes = 60;
if (n >> 32) zeroes -= 32, n >>= 32;
if (n >> 16) zeroes -= 16, n >>= 16;
if (n >> 8) zeroes -= 8, n >>= 8;
if (n >> 4) zeroes -= 4, n >>= 4;
return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes;
}
static inline int CountLeadingZeros64(uint64_t n) {
#if defined(_MSC_VER) && defined(_M_X64)
// MSVC does not have __builtin_clzll. Use _BitScanReverse64.
unsigned long result = 0; // NOLINT(runtime/int)
if (_BitScanReverse64(&result, n)) {
return 63 - result;
}
return 64;
#elif defined(_MSC_VER)
// MSVC does not have __builtin_clzll. Compose two calls to _BitScanReverse
unsigned long result = 0; // NOLINT(runtime/int)
if ((n >> 32) && _BitScanReverse(&result, n >> 32)) {
return 31 - result;
}
if (_BitScanReverse(&result, n)) {
return 63 - result;
}
return 64;
#elif defined(__GNUC__)
// Handle 0 as a special case because __builtin_clzll(0) is undefined.
if (n == 0) {
return 64;
}
return __builtin_clzll(n);
#else
return CountLeadingZeros64Slow(n);
#endif
}
static inline int MostSignificantBit64(uint64_t n) {
return 64 - CountLeadingZeros64(n);
}
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_

View File

@@ -1,52 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
#include <string.h>
#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h"
void FftCompute(struct FftState* state, const int16_t* input,
int input_scale_shift) {
const size_t input_size = state->input_size;
const size_t fft_size = state->fft_size;
int16_t* fft_input = state->input;
// First, scale the input by the given shift.
size_t i;
for (i = 0; i < input_size; ++i) {
fft_input[i] = static_cast<int16_t>(static_cast<uint16_t>(input[i])
<< input_scale_shift);
}
// Zero out whatever else remains in the top part of the input.
for (; i < fft_size; ++i) {
fft_input[i] = 0;
}
// Apply the FFT.
kissfft_fixed16::kiss_fftr(
reinterpret_cast<kissfft_fixed16::kiss_fftr_cfg>(state->scratch),
state->input,
reinterpret_cast<kissfft_fixed16::kiss_fft_cpx*>(state->output));
}
void FftInit(struct FftState* state) {
// All the initialization is done in FftPopulateState()
}
void FftReset(struct FftState* state) {
memset(state->input, 0, state->fft_size * sizeof(*state->input));
memset(state->output, 0, (state->fft_size / 2 + 1) * sizeof(*state->output));
}

View File

@@ -1,50 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_
#include <stdint.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
struct complex_int16_t {
int16_t real;
int16_t imag;
};
struct FftState {
int16_t* input;
struct complex_int16_t* output;
size_t fft_size;
size_t input_size;
void* scratch;
size_t scratch_size;
};
void FftCompute(struct FftState* state, const int16_t* input,
int input_scale_shift);
void FftInit(struct FftState* state);
void FftReset(struct FftState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_

View File

@@ -1,70 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
#include <stdio.h>
#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h"
int FftPopulateState(struct FftState* state, size_t input_size) {
state->input_size = input_size;
state->fft_size = 1;
while (state->fft_size < state->input_size) {
state->fft_size <<= 1;
}
state->input = reinterpret_cast<int16_t*>(
malloc(state->fft_size * sizeof(*state->input)));
if (state->input == nullptr) {
fprintf(stderr, "Failed to alloc fft input buffer\n");
return 0;
}
state->output = reinterpret_cast<complex_int16_t*>(
malloc((state->fft_size / 2 + 1) * sizeof(*state->output) * 2));
if (state->output == nullptr) {
fprintf(stderr, "Failed to alloc fft output buffer\n");
return 0;
}
// Ask kissfft how much memory it wants.
size_t scratch_size = 0;
kissfft_fixed16::kiss_fftr_cfg kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(
state->fft_size, 0, nullptr, &scratch_size);
if (kfft_cfg != nullptr) {
fprintf(stderr, "Kiss memory sizing failed.\n");
return 0;
}
state->scratch = malloc(scratch_size);
if (state->scratch == nullptr) {
fprintf(stderr, "Failed to alloc fft scratch buffer\n");
return 0;
}
state->scratch_size = scratch_size;
// Let kissfft configure the scratch space we just allocated
kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(state->fft_size, 0,
state->scratch, &scratch_size);
if (kfft_cfg != state->scratch) {
fprintf(stderr, "Kiss memory preallocation strategy failed.\n");
return 0;
}
return 1;
}
void FftFreeStateContents(struct FftState* state) {
free(state->input);
free(state->output);
free(state->scratch);
}

View File

@@ -1,34 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_
#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
#ifdef __cplusplus
extern "C" {
#endif
// Prepares and FFT for the given input size.
int FftPopulateState(struct FftState* state, size_t input_size);
// Frees any allocated buffers.
void FftFreeStateContents(struct FftState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_

View File

@@ -1,134 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
#include <string.h>
#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
struct complex_int16_t* fft_output,
int32_t* energy) {
const int end_index = state->end_index;
int i;
energy += state->start_index;
fft_output += state->start_index;
for (i = state->start_index; i < end_index; ++i) {
const int32_t real = fft_output->real;
const int32_t imag = fft_output->imag;
fft_output++;
const uint32_t mag_squared = (real * real) + (imag * imag);
*energy++ = mag_squared;
}
}
void FilterbankAccumulateChannels(struct FilterbankState* state,
const int32_t* energy) {
uint64_t* work = state->work;
uint64_t weight_accumulator = 0;
uint64_t unweight_accumulator = 0;
const int16_t* channel_frequency_starts = state->channel_frequency_starts;
const int16_t* channel_weight_starts = state->channel_weight_starts;
const int16_t* channel_widths = state->channel_widths;
int num_channels_plus_1 = state->num_channels + 1;
int i;
for (i = 0; i < num_channels_plus_1; ++i) {
const int32_t* magnitudes = energy + *channel_frequency_starts++;
const int16_t* weights = state->weights + *channel_weight_starts;
const int16_t* unweights = state->unweights + *channel_weight_starts++;
const int width = *channel_widths++;
int j;
for (j = 0; j < width; ++j) {
weight_accumulator += *weights++ * ((uint64_t)*magnitudes);
unweight_accumulator += *unweights++ * ((uint64_t)*magnitudes);
++magnitudes;
}
*work++ = weight_accumulator;
weight_accumulator = unweight_accumulator;
unweight_accumulator = 0;
}
}
static uint16_t Sqrt32(uint32_t num) {
if (num == 0) {
return 0;
}
uint32_t res = 0;
int max_bit_number = 32 - MostSignificantBit32(num);
max_bit_number |= 1;
uint32_t bit = 1U << (31 - max_bit_number);
int iterations = (31 - max_bit_number) / 2 + 1;
while (iterations--) {
if (num >= res + bit) {
num -= res + bit;
res = (res >> 1U) + bit;
} else {
res >>= 1U;
}
bit >>= 2U;
}
// Do rounding - if we have the bits.
if (num > res && res != 0xFFFF) {
++res;
}
return res;
}
static uint32_t Sqrt64(uint64_t num) {
// Take a shortcut and just use 32 bit operations if the upper word is all
// clear. This will cause a slight off by one issue for numbers close to 2^32,
// but it probably isn't going to matter (and gives us a big performance win).
if ((num >> 32) == 0) {
return Sqrt32((uint32_t)num);
}
uint64_t res = 0;
int max_bit_number = 64 - MostSignificantBit64(num);
max_bit_number |= 1;
uint64_t bit = 1ULL << (63 - max_bit_number);
int iterations = (63 - max_bit_number) / 2 + 1;
while (iterations--) {
if (num >= res + bit) {
num -= res + bit;
res = (res >> 1U) + bit;
} else {
res >>= 1U;
}
bit >>= 2U;
}
// Do rounding - if we have the bits.
if (num > res && res != 0xFFFFFFFFLL) {
++res;
}
return res;
}
uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) {
const int num_channels = state->num_channels;
const uint64_t* work = state->work + 1;
// Reuse the work buffer since we're fine clobbering it at this point to hold
// the output.
uint32_t* output = (uint32_t*)state->work;
int i;
for (i = 0; i < num_channels; ++i) {
*output++ = Sqrt64(*work++) >> scale_down_shift;
}
return (uint32_t*)state->work;
}
void FilterbankReset(struct FilterbankState* state) {
memset(state->work, 0, (state->num_channels + 1) * sizeof(*state->work));
}

View File

@@ -1,63 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_
#include <stdint.h>
#include <stdlib.h>
#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
#define kFilterbankBits 12
#ifdef __cplusplus
extern "C" {
#endif
struct FilterbankState {
int num_channels;
int start_index;
int end_index;
int16_t* channel_frequency_starts;
int16_t* channel_weight_starts;
int16_t* channel_widths;
int16_t* weights;
int16_t* unweights;
uint64_t* work;
};
// Converts the relevant complex values of an FFT output into energy (the
// square magnitude).
void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state,
struct complex_int16_t* fft_output,
int32_t* energy);
// Computes the mel-scale filterbank on the given energy array. Output is cached
// internally - to fetch it, you need to call FilterbankSqrt.
void FilterbankAccumulateChannels(struct FilterbankState* state,
const int32_t* energy);
// Applies an integer square root to the 64 bit intermediate values of the
// filterbank, and returns a pointer to them. Memory will be invalidated the
// next time FilterbankAccumulateChannels is called.
uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift);
void FilterbankReset(struct FilterbankState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_

View File

@@ -1,220 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
#include <assert.h>
#include <math.h>
#include <stdio.h>
#define kFilterbankIndexAlignment 4
#define kFilterbankChannelBlockSize 4
void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config) {
config->num_channels = 32;
config->lower_band_limit = 125.0f;
config->upper_band_limit = 7500.0f;
config->output_scale_shift = 7;
}
static float FreqToMel(float freq) { return 1127.0 * log1p(freq / 700.0); }
static void CalculateCenterFrequencies(const int num_channels,
const float lower_frequency_limit,
const float upper_frequency_limit,
float* center_frequencies) {
assert(lower_frequency_limit >= 0.0f);
assert(upper_frequency_limit > lower_frequency_limit);
const float mel_low = FreqToMel(lower_frequency_limit);
const float mel_hi = FreqToMel(upper_frequency_limit);
const float mel_span = mel_hi - mel_low;
const float mel_spacing = mel_span / ((float)num_channels);
int i;
for (i = 0; i < num_channels; ++i) {
center_frequencies[i] = mel_low + (mel_spacing * (i + 1));
}
}
static void QuantizeFilterbankWeights(const float float_weight, int16_t* weight,
int16_t* unweight) {
*weight = floor(float_weight * (1 << kFilterbankBits) + 0.5);
*unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5);
}
int FilterbankPopulateState(const struct FilterbankConfig* config,
struct FilterbankState* state, int sample_rate,
int spectrum_size) {
state->num_channels = config->num_channels;
const int num_channels_plus_1 = config->num_channels + 1;
// How should we align things to index counts given the byte alignment?
const int index_alignment =
(kFilterbankIndexAlignment < sizeof(int16_t)
? 1
: kFilterbankIndexAlignment / sizeof(int16_t));
state->channel_frequency_starts =
malloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts));
state->channel_weight_starts =
malloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts));
state->channel_widths =
malloc(num_channels_plus_1 * sizeof(*state->channel_widths));
state->work = malloc(num_channels_plus_1 * sizeof(*state->work));
float* center_mel_freqs =
malloc(num_channels_plus_1 * sizeof(*center_mel_freqs));
int16_t* actual_channel_starts =
malloc(num_channels_plus_1 * sizeof(*actual_channel_starts));
int16_t* actual_channel_widths =
malloc(num_channels_plus_1 * sizeof(*actual_channel_widths));
if (state->channel_frequency_starts == NULL ||
state->channel_weight_starts == NULL || state->channel_widths == NULL ||
center_mel_freqs == NULL || actual_channel_starts == NULL ||
actual_channel_widths == NULL) {
free(center_mel_freqs);
free(actual_channel_starts);
free(actual_channel_widths);
fprintf(stderr, "Failed to allocate channel buffers\n");
return 0;
}
CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit,
config->upper_band_limit, center_mel_freqs);
// Always exclude DC.
const float hz_per_sbin = 0.5 * sample_rate / ((float)spectrum_size - 1);
state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin;
state->end_index = 0; // Initialized to zero here, but actually set below.
// For each channel, we need to figure out what frequencies belong to it, and
// how much padding we need to add so that we can efficiently multiply the
// weights and unweights for accumulation. To simplify the multiplication
// logic, all channels will have some multiplication to do (even if there are
// no frequencies that accumulate to that channel) - they will be directed to
// a set of zero weights.
int chan_freq_index_start = state->start_index;
int weight_index_start = 0;
int needs_zeros = 0;
int chan;
for (chan = 0; chan < num_channels_plus_1; ++chan) {
// Keep jumping frequencies until we overshoot the bound on this channel.
int freq_index = chan_freq_index_start;
while (FreqToMel((freq_index)*hz_per_sbin) <= center_mel_freqs[chan]) {
++freq_index;
}
const int width = freq_index - chan_freq_index_start;
actual_channel_starts[chan] = chan_freq_index_start;
actual_channel_widths[chan] = width;
if (width == 0) {
// This channel doesn't actually get anything from the frequencies, it's
// always zero. We need then to insert some 'zero' weights into the
// output, and just redirect this channel to do a single multiplication at
// this point. For simplicity, the zeros are placed at the beginning of
// the weights arrays, so we have to go and update all the other
// weight_starts to reflect this shift (but only once).
state->channel_frequency_starts[chan] = 0;
state->channel_weight_starts[chan] = 0;
state->channel_widths[chan] = kFilterbankChannelBlockSize;
if (!needs_zeros) {
needs_zeros = 1;
int j;
for (j = 0; j < chan; ++j) {
state->channel_weight_starts[j] += kFilterbankChannelBlockSize;
}
weight_index_start += kFilterbankChannelBlockSize;
}
} else {
// How far back do we need to go to ensure that we have the proper
// alignment?
const int aligned_start =
(chan_freq_index_start / index_alignment) * index_alignment;
const int aligned_width = (chan_freq_index_start - aligned_start + width);
const int padded_width =
(((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) *
kFilterbankChannelBlockSize;
state->channel_frequency_starts[chan] = aligned_start;
state->channel_weight_starts[chan] = weight_index_start;
state->channel_widths[chan] = padded_width;
weight_index_start += padded_width;
}
chan_freq_index_start = freq_index;
}
// Allocate the two arrays to store the weights - weight_index_start contains
// the index of what would be the next set of weights that we would need to
// add, so that's how many weights we need to allocate.
state->weights = calloc(weight_index_start, sizeof(*state->weights));
state->unweights = calloc(weight_index_start, sizeof(*state->unweights));
// If the alloc failed, we also need to nuke the arrays.
if (state->weights == NULL || state->unweights == NULL) {
free(center_mel_freqs);
free(actual_channel_starts);
free(actual_channel_widths);
fprintf(stderr, "Failed to allocate weights or unweights\n");
return 0;
}
// Next pass, compute all the weights. Since everything has been memset to
// zero, we only need to fill in the weights that correspond to some frequency
// for a channel.
const float mel_low = FreqToMel(config->lower_band_limit);
for (chan = 0; chan < num_channels_plus_1; ++chan) {
int frequency = actual_channel_starts[chan];
const int num_frequencies = actual_channel_widths[chan];
const int frequency_offset =
frequency - state->channel_frequency_starts[chan];
const int weight_start = state->channel_weight_starts[chan];
const float denom_val = (chan == 0) ? mel_low : center_mel_freqs[chan - 1];
int j;
for (j = 0; j < num_frequencies; ++j, ++frequency) {
const float weight =
(center_mel_freqs[chan] - FreqToMel(frequency * hz_per_sbin)) /
(center_mel_freqs[chan] - denom_val);
// Make the float into an integer for the weights (and unweights).
const int weight_index = weight_start + frequency_offset + j;
QuantizeFilterbankWeights(weight, state->weights + weight_index,
state->unweights + weight_index);
}
if (frequency > state->end_index) {
state->end_index = frequency;
}
}
free(center_mel_freqs);
free(actual_channel_starts);
free(actual_channel_widths);
if (state->end_index >= spectrum_size) {
fprintf(stderr, "Filterbank end_index is above spectrum size.\n");
return 0;
}
return 1;
}
void FilterbankFreeStateContents(struct FilterbankState* state) {
free(state->channel_frequency_starts);
free(state->channel_weight_starts);
free(state->channel_widths);
free(state->weights);
free(state->unweights);
free(state->work);
}

View File

@@ -1,50 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_
#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
#ifdef __cplusplus
extern "C" {
#endif
struct FilterbankConfig {
// number of frequency channel buckets for filterbank
int num_channels;
// maximum frequency to include
float upper_band_limit;
// minimum frequency to include
float lower_band_limit;
// unused
int output_scale_shift;
};
// Fills the frontendConfig with "sane" defaults.
void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config);
// Allocates any buffers.
int FilterbankPopulateState(const struct FilterbankConfig* config,
struct FilterbankState* state, int sample_rate,
int spectrum_size);
// Frees any allocated buffers.
void FilterbankFreeStateContents(struct FilterbankState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_

View File

@@ -1,72 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
const int16_t* samples,
size_t num_samples,
size_t* num_samples_read) {
struct FrontendOutput output;
output.values = NULL;
output.size = 0;
// Try to apply the window - if it fails, return and wait for more data.
if (!WindowProcessSamples(&state->window, samples, num_samples,
num_samples_read)) {
return output;
}
// Apply the FFT to the window's output (and scale it so that the fixed point
// FFT can have as much resolution as possible).
int input_shift =
15 - MostSignificantBit32(state->window.max_abs_output_value);
FftCompute(&state->fft, state->window.output, input_shift);
// We can re-ruse the fft's output buffer to hold the energy.
int32_t* energy = (int32_t*)state->fft.output;
FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output,
energy);
FilterbankAccumulateChannels(&state->filterbank, energy);
uint32_t* scaled_filterbank = FilterbankSqrt(&state->filterbank, input_shift);
// Apply noise reduction.
NoiseReductionApply(&state->noise_reduction, scaled_filterbank);
if (state->pcan_gain_control.enable_pcan) {
PcanGainControlApply(&state->pcan_gain_control, scaled_filterbank);
}
// Apply the log and scale.
int correction_bits =
MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
uint16_t* logged_filterbank =
LogScaleApply(&state->log_scale, scaled_filterbank,
state->filterbank.num_channels, correction_bits);
output.size = state->filterbank.num_channels;
output.values = logged_filterbank;
return output;
}
void FrontendReset(struct FrontendState* state) {
WindowReset(&state->window);
FftReset(&state->fft);
FilterbankReset(&state->filterbank);
NoiseReductionReset(&state->noise_reduction);
}

View File

@@ -1,64 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_
#include <stdint.h>
#include <stdlib.h>
#include "tensorflow/lite/experimental/microfrontend/lib/fft.h"
#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h"
#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
#ifdef __cplusplus
extern "C" {
#endif
struct FrontendState {
struct WindowState window;
struct FftState fft;
struct FilterbankState filterbank;
struct NoiseReductionState noise_reduction;
struct PcanGainControlState pcan_gain_control;
struct LogScaleState log_scale;
};
struct FrontendOutput {
const uint16_t* values;
size_t size;
};
// Main entry point to processing frontend samples. Updates num_samples_read to
// contain the number of samples that have been consumed from the input array.
// Returns a struct containing the generated output. If not enough samples were
// added to generate a feature vector, the returned size will be 0 and the
// values pointer will be NULL. Note that the output pointer will be invalidated
// as soon as FrontendProcessSamples is called again, so copy the contents
// elsewhere if you need to use them later.
struct FrontendOutput FrontendProcessSamples(struct FrontendState* state,
const int16_t* samples,
size_t num_samples,
size_t* num_samples_read);
void FrontendReset(struct FrontendState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_

View File

@@ -1,85 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h"
#include <stdio.h>
#include <string.h>
#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
void FrontendFillConfigWithDefaults(struct FrontendConfig* config) {
WindowFillConfigWithDefaults(&config->window);
FilterbankFillConfigWithDefaults(&config->filterbank);
NoiseReductionFillConfigWithDefaults(&config->noise_reduction);
PcanGainControlFillConfigWithDefaults(&config->pcan_gain_control);
LogScaleFillConfigWithDefaults(&config->log_scale);
}
int FrontendPopulateState(const struct FrontendConfig* config,
struct FrontendState* state, int sample_rate) {
memset(state, 0, sizeof(*state));
if (!WindowPopulateState(&config->window, &state->window, sample_rate)) {
fprintf(stderr, "Failed to populate window state\n");
return 0;
}
if (!FftPopulateState(&state->fft, state->window.size)) {
fprintf(stderr, "Failed to populate fft state\n");
return 0;
}
FftInit(&state->fft);
if (!FilterbankPopulateState(&config->filterbank, &state->filterbank,
sample_rate, state->fft.fft_size / 2 + 1)) {
fprintf(stderr, "Failed to populate filterbank state\n");
return 0;
}
if (!NoiseReductionPopulateState(&config->noise_reduction,
&state->noise_reduction,
state->filterbank.num_channels)) {
fprintf(stderr, "Failed to populate noise reduction state\n");
return 0;
}
int input_correction_bits =
MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2);
if (!PcanGainControlPopulateState(
&config->pcan_gain_control, &state->pcan_gain_control,
state->noise_reduction.estimate, state->filterbank.num_channels,
state->noise_reduction.smoothing_bits, input_correction_bits)) {
fprintf(stderr, "Failed to populate pcan gain control state\n");
return 0;
}
if (!LogScalePopulateState(&config->log_scale, &state->log_scale)) {
fprintf(stderr, "Failed to populate log scale state\n");
return 0;
}
FrontendReset(state);
// All good, return a true value.
return 1;
}
void FrontendFreeStateContents(struct FrontendState* state) {
WindowFreeStateContents(&state->window);
FftFreeStateContents(&state->fft);
FilterbankFreeStateContents(&state->filterbank);
NoiseReductionFreeStateContents(&state->noise_reduction);
PcanGainControlFreeStateContents(&state->pcan_gain_control);
}

View File

@@ -1,52 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_
#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h"
#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h"
#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h"
#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
#ifdef __cplusplus
extern "C" {
#endif
struct FrontendConfig {
struct WindowConfig window;
struct FilterbankConfig filterbank;
struct NoiseReductionConfig noise_reduction;
struct PcanGainControlConfig pcan_gain_control;
struct LogScaleConfig log_scale;
};
// Fills the frontendConfig with "sane" defaults.
void FrontendFillConfigWithDefaults(struct FrontendConfig* config);
// Allocates any buffers.
int FrontendPopulateState(const struct FrontendConfig* config,
struct FrontendState* state, int sample_rate);
// Frees any allocated buffers.
void FrontendFreeStateContents(struct FrontendState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_

View File

@@ -1,48 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_
// This header file should be included in all variants of kiss_fft_$type.{h,cc}
// so that their sub-included source files do not mistakenly wrap libc header
// files within their kissfft_$type namespaces.
// E.g, This header avoids kissfft_int16.h containing:
// namespace kiss_fft_int16 {
// #include "kiss_fft.h"
// }
// where kiss_fft_.h contains:
// #include <math.h>
//
// TRICK: By including the following header files here, their preprocessor
// header guards prevent them being re-defined inside of the kiss_fft_$type
// namespaces declared within the kiss_fft_$type.{h,cc} sources.
// Note that the original kiss_fft*.h files are untouched since they
// may be used in libraries that include them directly.
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef FIXED_POINT
#include <sys/types.h>
#endif
#ifdef USE_SIMD
#include <xmmintrin.h>
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_

View File

@@ -1,8 +0,0 @@
#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h"
#define FIXED_POINT 16
namespace kissfft_fixed16 {
#include "kiss_fft.c"
#include "tools/kiss_fftr.c"
} // namespace kissfft_fixed16
#undef FIXED_POINT

View File

@@ -1,33 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_
#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h"
// Wrap 16-bit kiss fft in its own namespace. Enables us to link an application
// with different kiss fft resultions (16/32 bit interger, float, double)
// without getting a linker error.
#define FIXED_POINT 16
namespace kissfft_fixed16 {
#include "kiss_fft.h"
#include "tools/kiss_fftr.h"
} // namespace kissfft_fixed16
#undef FIXED_POINT
#undef kiss_fft_scalar
#undef KISS_FFT_H
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_

View File

@@ -1,30 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
const uint16_t kLogLut[]
#ifndef _MSC_VER
__attribute__((aligned(4)))
#endif // _MSV_VER
= {0, 224, 442, 654, 861, 1063, 1259, 1450, 1636, 1817, 1992, 2163,
2329, 2490, 2646, 2797, 2944, 3087, 3224, 3358, 3487, 3611, 3732, 3848,
3960, 4068, 4172, 4272, 4368, 4460, 4549, 4633, 4714, 4791, 4864, 4934,
5001, 5063, 5123, 5178, 5231, 5280, 5326, 5368, 5408, 5444, 5477, 5507,
5533, 5557, 5578, 5595, 5610, 5622, 5631, 5637, 5640, 5641, 5638, 5633,
5626, 5615, 5602, 5586, 5568, 5547, 5524, 5498, 5470, 5439, 5406, 5370,
5332, 5291, 5249, 5203, 5156, 5106, 5054, 5000, 4944, 4885, 4825, 4762,
4697, 4630, 4561, 4490, 4416, 4341, 4264, 4184, 4103, 4020, 3935, 3848,
3759, 3668, 3575, 3481, 3384, 3286, 3186, 3084, 2981, 2875, 2768, 2659,
2549, 2437, 2323, 2207, 2090, 1971, 1851, 1729, 1605, 1480, 1353, 1224,
1094, 963, 830, 695, 559, 421, 282, 142, 0, 0};

View File

@@ -1,40 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// Number of segments in the log lookup table. The table will be kLogSegments+1
// in length (with some padding).
#define kLogSegments 128
#define kLogSegmentsLog2 7
// Scale used by lookup table.
#define kLogScale 65536
#define kLogScaleLog2 16
#define kLogCoeff 45426
extern const uint16_t kLogLut[];
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_

View File

@@ -1,83 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h"
#define kuint16max 0x0000FFFF
// The following functions implement integer logarithms of various sizes. The
// approximation is calculated according to method described in
// www.inti.gob.ar/electronicaeinformatica/instrumentacion/utic/
// publicaciones/SPL2007/Log10-spl07.pdf
// It first calculates log2 of the input and then converts it to natural
// logarithm.
static uint32_t Log2FractionPart(const uint32_t x, const uint32_t log2x) {
// Part 1
int32_t frac = x - (1LL << log2x);
if (log2x < kLogScaleLog2) {
frac <<= kLogScaleLog2 - log2x;
} else {
frac >>= log2x - kLogScaleLog2;
}
// Part 2
const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2);
const uint32_t seg_unit =
(((uint32_t)1) << kLogScaleLog2) >> kLogSegmentsLog2;
const int32_t c0 = kLogLut[base_seg];
const int32_t c1 = kLogLut[base_seg + 1];
const int32_t seg_base = seg_unit * base_seg;
const int32_t rel_pos = ((c1 - c0) * (frac - seg_base)) >> kLogScaleLog2;
return frac + c0 + rel_pos;
}
static uint32_t Log(const uint32_t x, const uint32_t scale_shift) {
const uint32_t integer = MostSignificantBit32(x) - 1;
const uint32_t fraction = Log2FractionPart(x, integer);
const uint32_t log2 = (integer << kLogScaleLog2) + fraction;
const uint32_t round = kLogScale / 2;
const uint32_t loge = (((uint64_t)kLogCoeff) * log2 + round) >> kLogScaleLog2;
// Finally scale to our output scale
const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2;
return loge_scaled;
}
uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
int signal_size, int correction_bits) {
const int scale_shift = state->scale_shift;
uint16_t* output = (uint16_t*)signal;
uint16_t* ret = output;
int i;
for (i = 0; i < signal_size; ++i) {
uint32_t value = *signal++;
if (state->enable_log) {
if (correction_bits < 0) {
value >>= -correction_bits;
} else {
value <<= correction_bits;
}
if (value > 1) {
value = Log(value, scale_shift);
} else {
value = 0;
}
}
*output++ = (value < kuint16max) ? value : kuint16max;
}
return ret;
}

View File

@@ -1,39 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_
#include <stdint.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
struct LogScaleState {
int enable_log;
int scale_shift;
};
// Applies a fixed point logarithm to the signal and converts it to 16 bit. Note
// that the signal array will be modified.
uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
int signal_size, int correction_bits);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_

View File

@@ -1,27 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h"
void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config) {
config->enable_log = 1;
config->scale_shift = 6;
}
int LogScalePopulateState(const struct LogScaleConfig* config,
struct LogScaleState* state) {
state->enable_log = config->enable_log;
state->scale_shift = config->scale_shift;
return 1;
}

View File

@@ -1,45 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_
#include <stdint.h>
#include <stdlib.h>
#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h"
#ifdef __cplusplus
extern "C" {
#endif
struct LogScaleConfig {
// set to false (0) to disable this module
int enable_log;
// scale results by 2^(scale_shift)
int scale_shift;
};
// Populates the LogScaleConfig with "sane" default values.
void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config);
// Allocates any buffers.
int LogScalePopulateState(const struct LogScaleConfig* config,
struct LogScaleState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_

View File

@@ -1,51 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
#include <string.h>
void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) {
int i;
for (i = 0; i < state->num_channels; ++i) {
const uint32_t smoothing =
((i & 1) == 0) ? state->even_smoothing : state->odd_smoothing;
const uint32_t one_minus_smoothing = (1 << kNoiseReductionBits) - smoothing;
// Update the estimate of the noise.
const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits;
uint32_t estimate =
(((uint64_t)signal_scaled_up * smoothing) +
((uint64_t)state->estimate[i] * one_minus_smoothing)) >>
kNoiseReductionBits;
state->estimate[i] = estimate;
// Make sure that we can't get a negative value for the signal - estimate.
if (estimate > signal_scaled_up) {
estimate = signal_scaled_up;
}
const uint32_t floor =
((uint64_t)signal[i] * state->min_signal_remaining) >>
kNoiseReductionBits;
const uint32_t subtracted =
(signal_scaled_up - estimate) >> state->smoothing_bits;
const uint32_t output = subtracted > floor ? subtracted : floor;
signal[i] = output;
}
}
void NoiseReductionReset(struct NoiseReductionState* state) {
memset(state->estimate, 0, sizeof(*state->estimate) * state->num_channels);
}

View File

@@ -1,46 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_
#define kNoiseReductionBits 14
#include <stdint.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
struct NoiseReductionState {
int smoothing_bits;
uint16_t even_smoothing;
uint16_t odd_smoothing;
uint16_t min_signal_remaining;
int num_channels;
uint32_t* estimate;
};
// Removes stationary noise from each channel of the signal using a low pass
// filter.
void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal);
void NoiseReductionReset(struct NoiseReductionState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_

View File

@@ -1,45 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h"
#include <stdio.h>
void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config) {
config->smoothing_bits = 10;
config->even_smoothing = 0.025;
config->odd_smoothing = 0.06;
config->min_signal_remaining = 0.05;
}
int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
struct NoiseReductionState* state,
int num_channels) {
state->smoothing_bits = config->smoothing_bits;
state->odd_smoothing = config->odd_smoothing * (1 << kNoiseReductionBits);
state->even_smoothing = config->even_smoothing * (1 << kNoiseReductionBits);
state->min_signal_remaining =
config->min_signal_remaining * (1 << kNoiseReductionBits);
state->num_channels = num_channels;
state->estimate = calloc(state->num_channels, sizeof(*state->estimate));
if (state->estimate == NULL) {
fprintf(stderr, "Failed to alloc estimate buffer\n");
return 0;
}
return 1;
}
void NoiseReductionFreeStateContents(struct NoiseReductionState* state) {
free(state->estimate);
}

View File

@@ -1,50 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_
#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h"
#ifdef __cplusplus
extern "C" {
#endif
struct NoiseReductionConfig {
// scale the signal up by 2^(smoothing_bits) before reduction
int smoothing_bits;
// smoothing coefficient for even-numbered channels
float even_smoothing;
// smoothing coefficient for odd-numbered channels
float odd_smoothing;
// fraction of signal to preserve (1.0 disables this module)
float min_signal_remaining;
};
// Populates the NoiseReductionConfig with "sane" default values.
void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config);
// Allocates any buffers.
int NoiseReductionPopulateState(const struct NoiseReductionConfig* config,
struct NoiseReductionState* state,
int num_channels);
// Frees any allocated buffers.
void NoiseReductionFreeStateContents(struct NoiseReductionState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_

View File

@@ -1,56 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
#include "tensorflow/lite/experimental/microfrontend/lib/bits.h"
int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut) {
if (x <= 2) {
return lut[x];
}
const int16_t interval = MostSignificantBit32(x);
lut += 4 * interval - 6;
const int16_t frac =
((interval < 11) ? (x << (11 - interval)) : (x >> (interval - 11))) &
0x3FF;
int32_t result = ((int32_t)lut[2] * frac) >> 5;
result += (int32_t)((uint32_t)lut[1] << 5);
result *= frac;
result = (result + (1 << 14)) >> 15;
result += lut[0];
return (int16_t)result;
}
uint32_t PcanShrink(const uint32_t x) {
if (x < (2 << kPcanSnrBits)) {
return (x * x) >> (2 + 2 * kPcanSnrBits - kPcanOutputBits);
} else {
return (x >> (kPcanSnrBits - kPcanOutputBits)) - (1 << kPcanOutputBits);
}
}
void PcanGainControlApply(struct PcanGainControlState* state,
uint32_t* signal) {
int i;
for (i = 0; i < state->num_channels; ++i) {
const uint32_t gain =
WideDynamicFunction(state->noise_estimate[i], state->gain_lut);
const uint32_t snr = ((uint64_t)signal[i] * gain) >> state->snr_shift;
signal[i] = PcanShrink(snr);
}
}

View File

@@ -1,47 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_
#include <stdint.h>
#include <stdlib.h>
#define kPcanSnrBits 12
#define kPcanOutputBits 6
#ifdef __cplusplus
extern "C" {
#endif
// Details at https://research.google/pubs/pub45911.pdf
struct PcanGainControlState {
int enable_pcan;
uint32_t* noise_estimate;
int num_channels;
int16_t* gain_lut;
int32_t snr_shift;
};
int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut);
uint32_t PcanShrink(const uint32_t x);
void PcanGainControlApply(struct PcanGainControlState* state, uint32_t* signal);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_

View File

@@ -1,92 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h"
#include <math.h>
#include <stdio.h>
#define kint16max 0x00007FFF
void PcanGainControlFillConfigWithDefaults(
struct PcanGainControlConfig* config) {
config->enable_pcan = 0;
config->strength = 0.95;
config->offset = 80.0;
config->gain_bits = 21;
}
int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
int32_t input_bits, uint32_t x) {
const float x_as_float = ((float)x) / ((uint32_t)1 << input_bits);
const float gain_as_float =
((uint32_t)1 << config->gain_bits) *
powf(x_as_float + config->offset, -config->strength);
if (gain_as_float > kint16max) {
return kint16max;
}
return (int16_t)(gain_as_float + 0.5f);
}
int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
struct PcanGainControlState* state,
uint32_t* noise_estimate,
const int num_channels,
const uint16_t smoothing_bits,
const int32_t input_correction_bits) {
state->enable_pcan = config->enable_pcan;
if (!state->enable_pcan) {
return 1;
}
state->noise_estimate = noise_estimate;
state->num_channels = num_channels;
state->gain_lut = malloc(kWideDynamicFunctionLUTSize * sizeof(int16_t));
if (state->gain_lut == NULL) {
fprintf(stderr, "Failed to allocate gain LUT\n");
return 0;
}
state->snr_shift = config->gain_bits - input_correction_bits - kPcanSnrBits;
const int32_t input_bits = smoothing_bits - input_correction_bits;
state->gain_lut[0] = PcanGainLookupFunction(config, input_bits, 0);
state->gain_lut[1] = PcanGainLookupFunction(config, input_bits, 1);
state->gain_lut -= 6;
int interval;
for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
const uint32_t x0 = (uint32_t)1 << (interval - 1);
const uint32_t x1 = x0 + (x0 >> 1);
const uint32_t x2 =
(interval == kWideDynamicFunctionBits) ? x0 + (x0 - 1) : 2 * x0;
const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0);
const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1);
const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2);
const int32_t diff1 = (int32_t)y1 - y0;
const int32_t diff2 = (int32_t)y2 - y0;
const int32_t a1 = 4 * diff1 - diff2;
const int32_t a2 = diff2 - a1;
state->gain_lut[4 * interval] = y0;
state->gain_lut[4 * interval + 1] = (int16_t)a1;
state->gain_lut[4 * interval + 2] = (int16_t)a2;
}
state->gain_lut += 6;
return 1;
}
void PcanGainControlFreeStateContents(struct PcanGainControlState* state) {
free(state->gain_lut);
}

View File

@@ -1,57 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_
#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h"
#define kWideDynamicFunctionBits 32
#define kWideDynamicFunctionLUTSize (4 * kWideDynamicFunctionBits - 3)
#ifdef __cplusplus
extern "C" {
#endif
struct PcanGainControlConfig {
// set to false (0) to disable this module
int enable_pcan;
// gain normalization exponent (0.0 disables, 1.0 full strength)
float strength;
// positive value added in the normalization denominator
float offset;
// number of fractional bits in the gain
int gain_bits;
};
void PcanGainControlFillConfigWithDefaults(
struct PcanGainControlConfig* config);
int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config,
int32_t input_bits, uint32_t x);
int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
struct PcanGainControlState* state,
uint32_t* noise_estimate,
const int num_channels,
const uint16_t smoothing_bits,
const int32_t input_correction_bits);
void PcanGainControlFreeStateContents(struct PcanGainControlState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_

View File

@@ -1,70 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
#include <string.h>
int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
size_t num_samples, size_t* num_samples_read) {
const int size = state->size;
// Copy samples from the samples buffer over to our local input.
size_t max_samples_to_copy = state->size - state->input_used;
if (max_samples_to_copy > num_samples) {
max_samples_to_copy = num_samples;
}
memcpy(state->input + state->input_used, samples,
max_samples_to_copy * sizeof(*samples));
*num_samples_read = max_samples_to_copy;
state->input_used += max_samples_to_copy;
if (state->input_used < state->size) {
// We don't have enough samples to compute a window.
return 0;
}
// Apply the window to the input.
const int16_t* coefficients = state->coefficients;
const int16_t* input = state->input;
int16_t* output = state->output;
int i;
int16_t max_abs_output_value = 0;
for (i = 0; i < size; ++i) {
int16_t new_value =
(((int32_t)*input++) * *coefficients++) >> kFrontendWindowBits;
*output++ = new_value;
if (new_value < 0) {
new_value = -new_value;
}
if (new_value > max_abs_output_value) {
max_abs_output_value = new_value;
}
}
// Shuffle the input down by the step size, and update how much we have used.
memmove(state->input, state->input + state->step,
sizeof(*state->input) * (state->size - state->step));
state->input_used -= state->step;
state->max_abs_output_value = max_abs_output_value;
// Indicate that the output buffer is valid for the next stage.
return 1;
}
void WindowReset(struct WindowState* state) {
memset(state->input, 0, state->size * sizeof(*state->input));
memset(state->output, 0, state->size * sizeof(*state->output));
state->input_used = 0;
state->max_abs_output_value = 0;
}

View File

@@ -1,49 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_
#include <stdint.h>
#include <stdlib.h>
#define kFrontendWindowBits 12
#ifdef __cplusplus
extern "C" {
#endif
struct WindowState {
size_t size;
int16_t* coefficients;
size_t step;
int16_t* input;
size_t input_used;
int16_t* output;
int16_t max_abs_output_value;
};
// Applies a window to the samples coming in, stepping forward at the given
// rate.
int WindowProcessSamples(struct WindowState* state, const int16_t* samples,
size_t num_samples, size_t* num_samples_read);
void WindowReset(struct WindowState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_

View File

@@ -1,73 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Some platforms don't have M_PI
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
void WindowFillConfigWithDefaults(struct WindowConfig* config) {
config->size_ms = 25;
config->step_size_ms = 10;
}
int WindowPopulateState(const struct WindowConfig* config,
struct WindowState* state, int sample_rate) {
state->size = config->size_ms * sample_rate / 1000;
state->step = config->step_size_ms * sample_rate / 1000;
state->coefficients = malloc(state->size * sizeof(*state->coefficients));
if (state->coefficients == NULL) {
fprintf(stderr, "Failed to allocate window coefficients\n");
return 0;
}
// Populate the window values.
const float arg = M_PI * 2.0 / ((float)state->size);
int i;
for (i = 0; i < state->size; ++i) {
float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5)));
// Scale it to fixed point and round it.
state->coefficients[i] =
floor(float_value * (1 << kFrontendWindowBits) + 0.5);
}
state->input_used = 0;
state->input = malloc(state->size * sizeof(*state->input));
if (state->input == NULL) {
fprintf(stderr, "Failed to allocate window input\n");
return 0;
}
state->output = malloc(state->size * sizeof(*state->output));
if (state->output == NULL) {
fprintf(stderr, "Failed to allocate window output\n");
return 0;
}
return 1;
}
void WindowFreeStateContents(struct WindowState* state) {
free(state->coefficients);
free(state->input);
free(state->output);
}

View File

@@ -1,45 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_
#include "tensorflow/lite/experimental/microfrontend/lib/window.h"
#ifdef __cplusplus
extern "C" {
#endif
struct WindowConfig {
// length of window frame in milliseconds
size_t size_ms;
// length of step for next frame in milliseconds
size_t step_size_ms;
};
// Populates the WindowConfig with "sane" default values.
void WindowFillConfigWithDefaults(struct WindowConfig* config);
// Allocates any buffers.
int WindowPopulateState(const struct WindowConfig* config,
struct WindowState* state, int sample_rate);
// Frees any allocated buffers.
void WindowFreeStateContents(struct WindowState* state);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_

View File

@@ -1,122 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
#include <cstdint>
#include "tensorflow/lite/kernels/op_macros.h"
#ifndef TFLITE_DCHECK
#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_EQ
#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_NE
#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_GE
#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_GT
#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_LE
#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
#ifndef TFLITE_DCHECK_LT
#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
#endif
// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
#ifndef TFLITE_CHECK
#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_EQ
#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_NE
#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_GE
#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_GT
#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_LE
#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TFLITE_CHECK_LT
#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
#endif
#ifndef TF_LITE_STATIC_MEMORY
// TODO(b/162019032): Consider removing these type-aliases.
using int8 = std::int8_t;
using uint8 = std::uint8_t;
using int16 = std::int16_t;
using uint16 = std::uint16_t;
using int32 = std::int32_t;
using uint32 = std::uint32_t;
#endif // !defined(TF_LITE_STATIC_MEMORY)
// Allow for cross-compiler usage of function signatures - currently used for
// specifying named RUY profiler regions in templated methods.
#if defined(_MSC_VER)
#define TFLITE_PRETTY_FUNCTION __FUNCSIG__
#elif defined(__GNUC__)
#define TFLITE_PRETTY_FUNCTION __PRETTY_FUNCTION__
#else
#define TFLITE_PRETTY_FUNCTION __func__
#endif
// TFLITE_DEPRECATED()
//
// Duplicated from absl/base/macros.h to avoid pulling in that library.
// Marks a deprecated class, struct, enum, function, method and variable
// declarations. The macro argument is used as a custom diagnostic message (e.g.
// suggestion of a better alternative).
//
// Example:
//
// class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
// TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
//
// Every usage of a deprecated entity will trigger a warning when compiled with
// clang's `-Wdeprecated-declarations` option. This option is turned off by
// default, but the warnings will be reported by clang-tidy.
#if defined(__clang__) && __cplusplus >= 201103L
#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
#endif
#ifndef TFLITE_DEPRECATED
#define TFLITE_DEPRECATED(message)
#endif
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_

View File

@@ -1,40 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
#include <cmath>
namespace tflite {
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__)
#define TF_LITE_GLOBAL_STD_PREFIX
#else
#define TF_LITE_GLOBAL_STD_PREFIX std
#endif
#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \
template <class T> \
inline T tf_name(const T x) { \
return TF_LITE_GLOBAL_STD_PREFIX::std_name(x); \
}
DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round);
DECLARE_STD_GLOBAL_SWITCH1(TfLiteExpm1, expm1);
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_

View File

@@ -1,35 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
#include <cmath>
namespace tflite {
#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
inline float TfLiteMax(const float& x, const float& y) {
return std::max(x, y);
}
#else
template <class T>
inline T TfLiteMax(const T& x, const T& y) {
return std::fmax(x, y);
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_

View File

@@ -1,35 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
#include <cmath>
namespace tflite {
#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
inline float TfLiteMin(const float& x, const float& y) {
return std::min(x, y);
}
#else
template <class T>
inline T TfLiteMin(const T& x, const T& y) {
return std::fmin(x, y);
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_

View File

@@ -1,20 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
// TFLM does not need to utilize any Neon optimizations.
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_

View File

@@ -1,122 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
#include <vector>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
inline RuntimeShape GetTensorShape(std::vector<int32_t> data) {
return RuntimeShape(data.size(), data.data());
}
// A list of tensors in a format that can be used by kernels like split and
// concatenation.
template <typename T>
class VectorOfTensors {
public:
// Build with the tensors in 'tensor_list'.
VectorOfTensors(const TfLiteContext& context,
const TfLiteIntArray& tensor_list) {
int num_tensors = tensor_list.size;
all_data_.reserve(num_tensors);
all_shape_.reserve(num_tensors);
all_shape_ptr_.reserve(num_tensors);
for (int i = 0; i < num_tensors; ++i) {
TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
all_data_.push_back(GetTensorData<T>(t));
all_shape_.push_back(GetTensorShape(t));
}
// Taking the pointer from inside a std::vector is only OK if the vector is
// never modified, so we populate all_shape in the previous loop and then we
// are free to grab iterators here.
for (int i = 0; i < num_tensors; ++i) {
all_shape_ptr_.push_back(&all_shape_[i]);
}
}
// Return a pointer to the data pointers of all tensors in the list. For
// example:
// float* const* f = v.data();
// f[0][1] is the second element of the first tensor.
T* const* data() const { return all_data_.data(); }
// Return a pointer the shape pointers of all tensors in the list. For
// example:
// const RuntimeShape* const* d = v.dims();
// dims[1] are the dimensions of the second tensor in the list.
const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); }
private:
std::vector<T*> all_data_;
std::vector<RuntimeShape> all_shape_;
std::vector<RuntimeShape*> all_shape_ptr_;
};
// A list of quantized tensors in a format that can be used by kernels like
// split and concatenation.
class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t> {
public:
// Build with the tensors in 'tensor_list'.
VectorOfQuantizedTensors(const TfLiteContext& context,
const TfLiteIntArray& tensor_list)
: VectorOfTensors<uint8_t>(context, tensor_list) {
for (int i = 0; i < tensor_list.size; ++i) {
TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
zero_point_.push_back(t->params.zero_point);
scale_.push_back(t->params.scale);
}
}
const float* scale() const { return scale_.data(); }
const int32_t* zero_point() const { return zero_point_.data(); }
private:
std::vector<int32_t> zero_point_;
std::vector<float> scale_;
};
// Writes randomly accessed values from `input` sequentially into `output`.
template <typename T>
class SequentialTensorWriter {
public:
SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output) {
input_data_ = GetTensorData<T>(input);
output_ptr_ = GetTensorData<T>(output);
}
SequentialTensorWriter(const T* input_data, T* output_data)
: input_data_(input_data), output_ptr_(output_data) {}
void Write(int position) { *output_ptr_++ = input_data_[position]; }
void WriteN(int position, int len) {
memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
output_ptr_ += len;
}
private:
const T* input_data_;
T* output_ptr_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_

View File

@@ -1,484 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
#include <algorithm>
#include <cmath>
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#if defined(_MSC_VER)
#define __restrict__ __restrict
#endif
namespace tflite {
namespace tensor_utils {
// Multiplies a matrix with a scalar and reduce the result on each row to a
// scalar.
// Parameters:
// - matrix: matrix of size n_row * n_col
// - scalar: the scalar that is multiplied to each element in the matrix
// - n_row: the row count of the matrix
// - n_col: the column count of the matrix
// - output: the 32bit output
// Note: We do not need saturation because the int8 * int8 is safe from overflow
// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero
// initial output value is not exceptionally large.
void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
int32_t n_row, int32_t n_col,
int32_t* output);
// Add another vector for each batch in the batch vector.
template <typename T>
void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch,
T* batch_vector) {
for (int b = 0; b < n_batch; b++) {
for (int i = 0; i < v_size; ++i) {
batch_vector[i] += vector[i];
}
batch_vector += v_size;
}
}
// Cwise product of two vectors.
template <typename T>
inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2,
int v_size, T* result) {
for (int v = 0; v < v_size; v++) {
*result++ = *vector1++ * *vector2++;
}
}
// Cwise product of a vector and a batch-vector.
template <typename T>
inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size,
const T* batch_vector, int n_batch,
T* result) {
for (int b = 0; b < n_batch; b++) {
VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
// Update the pointers.
result += v_size;
batch_vector += v_size;
}
}
// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
// assumption here is that result array is initialized to valid values.
template <typename T>
inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
const T* __restrict__ vector2,
int v_size,
T* __restrict__ result) {
for (int v = 0; v < v_size; v++) {
*result++ += *vector1++ * *vector2++;
}
}
// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
// operation, the assumption here is that result array is initialized to valid
// values.
template <typename T>
inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size,
const T* batch_vector,
int n_batch, T* result) {
for (int b = 0; b < n_batch; b++) {
VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
// Update the pointers.
result += v_size;
batch_vector += v_size;
}
}
// Batch vector initialization with another vector.
template <typename T>
void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
T* batch_vector) {
for (int b = 0; b < n_batch; b++) {
std::copy_n(vector, v_size, batch_vector + b * v_size);
}
}
// Checks if all entries of vector are zero for float.
bool IsZeroVector(const float* vector, int v_size);
// Checks if all entries of vector are zero for int8.
bool IsZeroVector(const int8_t* vector, int v_size);
// Quantizes a buffer of floating point values using a symmetric quantization
// (i.e. linear quantization without an offset) to 8-bit signed integers.
// It also outputs the range (min, max) of the floating point buffer, and the
// scaling factor used to quantize the values.
void SymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* min_value,
float* max_value, float* scaling_factor);
// Quantizes a buffer of floating point values using a symmetric quantization
// (i.e. linear quantization without an offset) to 8-bit signed integers.
// It uses the range (min, max) provided to the function to calculate the
// appropriate scaling factor to quantize the values.
void SymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float min_value,
float max_value, float* scaling_factor);
void AsymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* scaling_factor,
int32_t* offset);
// Helper function to quantize floats.
// float_data_ptr input float vectors
// n_batch number of input vectors
// n_data size of a single input vector
// quantized_data_ptr (out) vector with quantized data
// scaling_factors (out) scaling factors (one per vector)
// zero_points (out) zero points (one per vector)
// do_asymmetric controls if the quantization should be asymmetric.
inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
int n_data, int8_t* quantized_data_ptr,
float* scaling_factors, int32_t* zero_points,
bool do_asymmetric) {
for (int b = 0; b < n_batch; ++b) {
const int offset = b * n_data;
if (do_asymmetric) {
tensor_utils::AsymmetricQuantizeFloats(
float_data_ptr + offset, n_data, quantized_data_ptr + offset,
&scaling_factors[b], &zero_points[b]);
} else {
float unused_min, unused_max;
tensor_utils::SymmetricQuantizeFloats(
float_data_ptr + offset, n_data, quantized_data_ptr + offset,
&unused_min, &unused_max, &scaling_factors[b]);
}
}
}
// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
// dimension composed by input vectors independent from each other). The result
// of the multiplication is accumulated to the passed result buffer.
// More specifically, for a matrix M of shape [n, i] and a batched-vector
// of shape [i, batch] it will first compute the product of shape [n, batch].
// This product will be accumulated to the result buffer.
void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
int m_cols, const float* vector,
int n_batch, float* result);
// Same as the function above, but the matrix is a sparse tensor with block
// pattern 1x4.
// This function assumes that m_cols is a multiple of the block size (4 in this
// case) so that there's no incomplete block.
void SparseMatrixBatchVectorMultiplyAccumulate1x4(
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const float* __restrict__ vector, int n_batch, float* __restrict__ result);
// Same as the function above, but the matrix is stored in block compressed
// sparse row format with block pattern 1x16 which consists of two arrays:
// 1. A matrix array stores non-zero blocks of the matrix in row major.
// 2. A ledger array stores nrows groups, one group per row. Each group starts
// with an integer representing the number of non-zero blocks for the
// corresponding row and follows with column indexes of the first element
// of each non-zero block.
// This function assumes that
// 1. m_cols is a multiple of 16 so that all blocks are full blocks.
// 2. m_cols < 254 * 16 so that block index can be represented by uint8.
void SparseMatrixBatchVectorMultiplyAccumulate(
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
float* __restrict__ result);
// Same as the function above, but for values quantized using symmetric
// quantization (e.g. by calling SymmetricQuantizeFloats).
// The passed scaling factors is a buffer of the quantization scaling factors
// that will be used to dequentize the products into the final result buffer.
// These scaling factors are the multiplication of the matrix scaling factor
// by the vector's scaling factor, one per batch (i.e. this allows quantizing
// each batch in the batch-vector matrix independently).
void MatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors,
const float* __restrict__ scaling_factors, int n_batch,
float* __restrict__ result);
// Same as the function above except that vector values
// are quantized with asymmetric quantization per-batch and the matrix
// is quantized per row.
void MatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors,
const float* __restrict__ scaling_factors, int n_batch,
float* __restrict__ result, const float* __restrict__ per_channel_scale,
const int32_t* __restrict__ input_offset);
// Same as the function above, but the matrix is a sparse tensor with block
// pattern 1x16.
// This function assumes that m_cols is a multiple of the block size (16 in this
// case) so that there's no incomplete block. Also, it assumes all offsets of
// input, output and filter are zero.
void SparseMatrixBatchVectorMultiplyAccumulate1x16(
const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
int n_batch, const int32_t input_offset, const int32_t output_multiplier,
const int32_t output_shift, const int32_t output_offset,
const int32_t output_activation_min, const int32_t output_activation_max,
int8_t* __restrict__ result);
// Same as the function above, but the matrix is stored in block compressed
// sparse row format with block pattern 1x16 which consists of two arrays:
// 1. A matrix array stores non-zero blocks of the matrix in row major.
// 2. A ledger array stores nrows groups, one group per row. Each group starts
// with an integer representing the number of non-zero blocks for the
// corresponding row followed by column index of the first element of
// each non-zero block.
// This function assumes that
// 1. m_cols is a multiple of 16 so that all blocks are full blocks.
// 2. m_cols < 254 * 16 so that block index can be represented by uint8.
void SparseMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const uint8_t* __restrict__ ledger,
const int m_rows, const int m_cols, const int8_t* __restrict__ vectors,
const float* __restrict__ scaling_factors, int n_batch,
float* __restrict__ result);
// Same as the above 8, 8, 8 integer matmul except for the presence of zero
// point and non-accumulative.
// TODO(b/148688698): remove this function by folding zero point calculation in
// prepare() function.
void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
const int8_t* input_to_gate_weights,
int32_t input_to_gate_effective_scale_a,
int32_t input_to_gate_effective_scale_b,
int32_t n_batch, int32_t n_input, int32_t n_cell,
int8_t* gate_output, int8_t gate_output_zp);
// Same as above but has 16 bit and 8 bit input and 8 bit output.
// Used in projection when hidden is 16bit.
void MatrixBatchVectorMultiply(const int16_t* hidden,
const int8_t* hidden_to_output_weights,
int32_t proj_effective_scale_a,
int32_t proj_effective_scale_b,
const int32_t* gate_bias, int32_t n_batch,
int32_t n_hidden, int32_t n_output,
int32_t output_zp, int8_t* proj_output);
// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized
// vector.
// Parameters:
// - input: batch vector of size n_batch * n_input; 16 bit.
// - layer_norm_weights: the quantized layer normalization weights.
// - bias: the bias for the layer normalization.
// - layer_norm_scale_a: multiplier for scale factor.
// - layer_norm_scale_b: shift for scale factor.
// - variance_limit: the guard to make sure the inverse does not overflow.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 16 bit output
void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
const int32_t* bias, int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b, int32_t variance_limit,
int n_batch, int n_input, int16_t* output);
// Same as above but the internal calculation is done in float.
void ApplyLayerNormFloat(const int16_t* input,
const int16_t* layer_norm_weights,
int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
const int32_t* bias, int n_batch, int n_input,
int16_t* output);
// Apply Sigmoid to a quantized vector.
// Parameters:
// - input: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 16 bit output
// The input is in Q3.12 format and the output is in Q0.15 format.
void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
int16_t* output);
// Same as above but the internal calcualtion is float.
void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
int16_t* output);
// Apply Tanh to a quantized vector.
// Parameters:
// - integer_bits: the integer bits of the input.
// Currently supports 0, 1, 2, 3, 4, 5, 6.
// - input: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 16 bit output
// The input is in Qm.15-m format and the output is in Q0.15 format.
void ApplyTanh(int32_t intger_bits, const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output);
// Apply Tanh to a quantized vector. Tbe internal calculation is in float.
// - Input has 2^(integer_bits) as scale.
// - Output has Q0.15 as scale.
void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
int32_t integer_bits, int16_t* output);
// Element-wise multiplication of two quantized vectors.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - shift: the shift needed to produce the output.
// - output: the 16 bit output of size n_batch * n_input.
// Output does not need to be initialized.
void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
int n_input, int shift, int16_t* output);
// Element-wise multiplication of two quantized vectors.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - shift: the shift needed to produce the output.
// - output: the 8 bit output of size n_batch * n_input.
// Output does not need to be initialized.
void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
int n_input, int shift, int8_t* output);
// Element-wise multiplication of two quantized vectors with rescaling.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - multiplier: the multiplier part of scale.
// - shift: the shift part of scale.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 8 bit output of size n_batch * n_input.
// - output_zp: the zero point of output.
// Output does not need to be initialized.
// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m *
// 2^(s - 31).
void CwiseMul(const int16_t* input_1, const int16_t* input_2,
int32_t multiplier, int32_t shift, int32_t n_batch,
int32_t n_input, int32_t output_zp, int8_t* output);
// Element-wise saturating addition of two quantized vectors without rescaling.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 8 bit output of size n_batch * n_input.
// Output does not need to be initialized.
void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
int n_input, int16_t* output);
// Element-wise in-place clipping of a vector. Overloaded for float, int16_t,
// int8_t. Parameters:
// - vector: vector of size v_size.
// - v_size: the size of the vector.
// - clipping_value: the value used for clipping.
void CwiseClipping(float* vector, const int v_size, const float clipping_value);
void CwiseClipping(int16_t* vector, const int v_size,
const int16_t clipping_value);
void CwiseClipping(int8_t* vector, const int v_size,
const int8_t clipping_value);
// Dot product of two vectors.
float VectorVectorDotProduct(const float* vector1, const float* vector2,
int v_size);
// Dot product of two batch vectors of size n_batch * v_size:
// vector1 = [x_1_1, x_1_2, ..., x_1_vsize,
// x_2_1, x_2_2, ..., x_2_vsize,
// ...
// x_nbatch_1,..., x_nbatch_vsize]
// vector2 = [y_1_1, y_1_2, ..., y_1_vsize,
// y_2_1, y_2_2, ..., y_2_vsize,
// ...
// y_nbatch_1,..., y_nbatch_vsize]
// Then result will be a vector of n_batch size starting from 'result':
// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize,
// x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize,
// ...
// x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize]
template <typename T>
inline void BatchVectorBatchVectorDotProduct(const T* vector1, const T* vector2,
int v_size, int n_batch,
T* result) {
for (int b = 0; b < n_batch; b++) {
result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
vector1 += v_size;
vector2 += v_size;
}
}
// Same as above but input is 16bit and output is 32bit.
void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
const int16_t* vector2, int v_size,
int n_batch, int32_t* result);
// Same as above, but inputs are 16bit integer and output is 16bit integer.
void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
const int16_t* batch_vector,
int n_batch, int32_t multiplier,
int shift, int16_t* result);
// Compute "1.0f - elements of vector" (used in CIFG).
void Sub1Vector(const float* vector, int v_size, float* result);
// Compute "1.0f - elements of vector" (used in CIFG) for int16 input.
// "vector" has range [0, 32767] because it is the output of sigmoid function.
void Sub1Vector(const int16_t* vector, int v_size, int16_t* result);
// Multiply all elements of vector with a scalar.
void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
float* result);
// Reduce-sum on a float input vector:
// input_vector: float pointer to input vector.
// output_vector: float pointer to vector.
// output_size: output vector size.
// reduction_size: number of consecutive elements from input vector which are
// added to get one element of output.
void ReductionSumVector(const float* input_vector, float* output_vector,
int output_size, int reduction_size);
// Same as above but input/output is 32 bit integer.
void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
int output_size, int reduction_size);
// Same as above but input is 8 bit integer.
void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
int output_size, int reduction_size);
// Layer norm for each batch.
void MeanStddevNormalization(const float* input_vector, float* output_vector,
int v_size, int n_batch);
// Saturate Add with rescale on both inputs.
void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
const int8_t* recurrent, int8_t recurrent_zp,
int32_t input_effective_scale_a,
int32_t input_effective_scale_b,
int32_t recurrent_effective_scale_a,
int32_t recurrent_effective_scale_b, int32_t n_batch,
int32_t n_cell, int16_t* output);
} // namespace tensor_utils
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_

View File

@@ -1,416 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include <algorithm>
#include <cmath>
#include <limits>
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
namespace tflite {
namespace {
// These constants are used to manipulate the binary representation of doubles.
// Double-precision binary64 floating point format is:
// Bit | 63 | 62-52 | 51-0 |
// | Sign | Exponent | Fraction |
// To avoid 64-bit integers as much as possible, I break this into high and
// low 32-bit chunks. High is:
// Bit | 31 | 30-20 | 19-0 |
// | Sign | Exponent | High Fraction |
// Low is:
// Bit | 31-0 |
// | Low Fraction |
// We then access the components through logical bit-wise operations to
// extract the parts needed, with the positions and masks derived from the
// layout shown above.
constexpr uint64_t kSignMask = 0x8000000000000000LL;
constexpr uint64_t kExponentMask = 0x7ff0000000000000LL;
constexpr int32_t kExponentShift = 52;
constexpr int32_t kExponentBias = 1023;
constexpr uint32_t kExponentIsBadNum = 0x7ff;
constexpr uint64_t kFractionMask = 0x000fffffffc00000LL;
constexpr uint32_t kFractionShift = 22;
constexpr uint32_t kFractionRoundingMask = 0x003fffff;
constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
} // namespace
void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
int* shift) {
#if TFLITE_SINGLE_ROUNDING
// Single-rounding MultiplyByQuantizedMultiplier only supports positive
// multipliers.
// TFLITE_DCHECK(double_multiplier >= 0);
#endif
if (double_multiplier == 0.) {
*quantized_multiplier = 0;
*shift = 0;
return;
}
#ifdef TFLITE_EMULATE_FLOAT
// If we're trying to avoid the use of floating-point instructions (for
// example on microcontrollers) then use an alternative implementation
// that only requires integer and bitwise operations. To enable this, you
// need to set the define during the build process for your platform.
int64_t q_fixed = IntegerFrExp(double_multiplier, shift);
#else // TFLITE_EMULATE_FLOAT
const double q = std::frexp(double_multiplier, shift);
auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1LL << 31)));
#endif // TFLITE_EMULATE_FLOAT
TFLITE_CHECK(q_fixed <= (1LL << 31));
if (q_fixed == (1LL << 31)) {
q_fixed /= 2;
++*shift;
}
TFLITE_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
// A shift amount smaller than -31 would cause all bits to be shifted out
// and thus all results would be zero. We implement that instead with
// q_fixed==0, so as to avoid hitting issues with right-shift
// operations with shift amounts greater than 31. Note that this happens
// roughly when abs(double_multiplier) < 2^-31 and the present handling means
// that we're effectively flushing tiny double_multiplier's to zero.
// We could conceivably handle values in the range (roughly) [32, 63]
// as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
// the present handling is just doing 'flush denormals to zero'. We could
// reconsider and actually generate nonzero denormals if a need arises.
if (*shift < -31) {
*shift = 0;
q_fixed = 0;
}
#if TFLITE_SINGLE_ROUNDING
// Single-rounding MultiplyByQuantizedMultiplier doesn't support a shift > 30,
// saturate it.
if (*shift > 30) {
*shift = 30;
q_fixed = (1LL << 31) - 1;
}
#endif
*quantized_multiplier = static_cast<int32_t>(q_fixed);
}
void QuantizeMultiplierGreaterThanOne(double double_multiplier,
int32_t* quantized_multiplier,
int* left_shift) {
TFLITE_CHECK_GT(double_multiplier, 1.);
QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift);
TFLITE_CHECK_GE(*left_shift, 0);
}
void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
int32_t* quantized_multiplier,
int* left_shift) {
TFLITE_CHECK_LT(double_multiplier, 1.);
TFLITE_CHECK_GT(double_multiplier, 0.);
int shift;
QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
TFLITE_CHECK_LE(shift, 0);
*left_shift = shift;
}
int64_t IntegerFrExp(double input, int* shift) {
// Make sure our assumptions about the double layout hold.
TFLITE_CHECK_EQ(8, sizeof(double));
// We want to access the bits of the input double value directly, which is
// tricky to do safely, so use a union to handle the casting.
union {
double double_value;
uint64_t double_as_uint;
} cast_union;
cast_union.double_value = input;
const uint64_t u = cast_union.double_as_uint;
// If the bitfield is all zeros apart from the sign bit, this is a normalized
// zero value, so return standard values for this special case.
if ((u & ~kSignMask) == 0) {
*shift = 0;
return 0;
}
// Deal with NaNs and Infs, which are always indicated with a fixed pattern in
// the exponent, and distinguished by whether the fractions are zero or
// non-zero.
const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift);
if (exponent_part == kExponentIsBadNum) {
*shift = std::numeric_limits<int>::max();
if (u & kFractionMask) {
// NaN, so just return zero (with the exponent set to INT_MAX).
return 0;
} else {
// Infinity, so return +/- INT_MAX.
if (u & kSignMask) {
return std::numeric_limits<int64_t>::min();
} else {
return std::numeric_limits<int64_t>::max();
}
}
}
// The shift is fairly easy to extract from the high bits of the double value,
// just by masking it out and applying a bias. The std::frexp() implementation
// always returns values between 0.5 and 1.0 though, whereas the exponent
// assumes 1.0 to 2.0 is the standard range, so I add on one to match that
// interface.
*shift = (exponent_part - kExponentBias) + 1;
// There's an implicit high bit in the double format definition, so make sure
// we include that at the top, and then reconstruct the rest of the fractional
// value from the remaining fragments.
int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift);
// We're cutting off some bits at the bottom, so to exactly match the standard
// frexp implementation here we'll apply rounding by adding one to the least
// significant bit of the result if the discarded portion is over half of the
// maximum.
if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) {
fraction += 1;
}
// Negate the fraction if the sign bit was set.
if (u & kSignMask) {
fraction *= -1;
}
return fraction;
}
double DoubleFromFractionAndShift(int64_t fraction, int shift) {
union {
double double_value;
uint64_t double_as_uint;
} result;
// Detect NaNs and infinities.
if (shift == std::numeric_limits<int>::max()) {
if (fraction == 0) {
return std::numeric_limits<double>::quiet_NaN();
} else if (fraction > 0) {
return std::numeric_limits<double>::infinity();
} else {
return -std::numeric_limits<double>::infinity();
}
}
// Return a normalized zero for a zero fraction.
if (fraction == 0) {
result.double_as_uint = 0;
return result.double_value;
}
bool is_negative = (fraction < 0);
int64_t encoded_fraction = is_negative ? -fraction : fraction;
int64_t encoded_shift = (shift - 1);
while (encoded_fraction < 0x40000000) {
encoded_fraction *= 2;
encoded_shift -= 1;
}
while (encoded_fraction > 0x80000000) {
encoded_fraction /= 2;
encoded_shift += 1;
}
encoded_fraction -= 0x40000000;
if (encoded_shift < -1022) {
encoded_shift = -1023;
} else if (encoded_shift > 1022) {
encoded_shift = 1023;
}
encoded_shift += kExponentBias;
uint64_t encoded_sign = is_negative ? kSignMask : 0;
result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) |
(encoded_fraction << kFractionShift);
return result.double_value;
}
double IntegerDoubleMultiply(double a, double b) {
int a_shift;
const int64_t a_fraction = IntegerFrExp(a, &a_shift);
int b_shift;
const int64_t b_fraction = IntegerFrExp(b, &b_shift);
// Detect NaNs and infinities.
if (a_shift == std::numeric_limits<int>::max() ||
(b_shift == std::numeric_limits<int>::max())) {
return std::numeric_limits<double>::quiet_NaN();
}
const int result_shift = a_shift + b_shift + 1;
const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
return DoubleFromFractionAndShift(result_fraction, result_shift);
}
int IntegerDoubleCompare(double a, double b) {
int a_shift;
const int64_t a_fraction = IntegerFrExp(a, &a_shift);
int b_shift;
const int64_t b_fraction = IntegerFrExp(b, &b_shift);
// Detect NaNs and infinities.
if (a_shift == std::numeric_limits<int>::max() ||
(b_shift == std::numeric_limits<int>::max())) {
return 1;
}
if ((a_fraction == 0) && (b_fraction < 0)) {
return 1;
} else if ((a_fraction < 0) && (b_fraction == 0)) {
return -1;
} else if (a_shift < b_shift) {
return -1;
} else if (a_shift > b_shift) {
return 1;
} else if (a_fraction < b_fraction) {
return -1;
} else if (a_fraction > b_fraction) {
return 1;
} else {
return 0;
}
}
void PreprocessSoftmaxScaling(double beta, double input_scale,
int input_integer_bits,
int32_t* quantized_multiplier, int* left_shift) {
// If the overall multiplier (input and beta) is large, then exp() of an
// input difference of 1 scaled by this will be large. In other words, we
// can cap the multiplier and know that, when it is used, the output will be
// (round to) zero wherever the input is not at the maximum value.
// If the overall scale is less than one, and input_integer_bits=0, then the
// result is double equivalent of Q0.31 (actually with more precision). Thus
// this generates a Q(input_integer_bits).(31-input_integer_bits)
// representation.
#if TFLITE_SINGLE_ROUNDING
const double max_real_multiplier = (1LL << 30) - 1.0;
#else
const double max_real_multiplier = (1LL << 31) - 1.0;
#endif
#ifdef TFLITE_EMULATE_FLOAT
const double input_beta = IntegerDoubleMultiply(beta, input_scale);
int shift;
int64_t fraction = IntegerFrExp(input_beta, &shift);
shift += (31 - input_integer_bits);
double input_beta_real_multiplier =
DoubleFromFractionAndShift(fraction, shift);
if (IntegerDoubleCompare(input_beta_real_multiplier, max_real_multiplier) >
0) {
input_beta_real_multiplier = max_real_multiplier;
}
#else // TFLITE_EMULATE_FLOAT
const double input_beta_real_multiplier =
std::min<double>(beta * input_scale * (1 << (31 - input_integer_bits)),
max_real_multiplier);
#endif // TFLITE_EMULATE_FLOAT
QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
quantized_multiplier, left_shift);
}
void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
int input_integer_bits,
int32_t* quantized_multiplier,
int* left_shift,
int32_t* reverse_scaling_divisor,
int* reverse_scaling_left_shift) {
PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits,
quantized_multiplier, left_shift);
// Also calculate what amounts to the inverse scaling factor for the input.
const double real_reverse_scaling_divisor =
(1 << (31 - *left_shift)) / static_cast<double>(*quantized_multiplier);
tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor,
reverse_scaling_divisor,
reverse_scaling_left_shift);
}
int CalculateInputRadius(int input_integer_bits, int input_left_shift,
int total_signed_bits) {
#ifdef TFLITE_EMULATE_FLOAT
int64_t result = (1 << input_integer_bits) - 1;
result <<= (total_signed_bits - input_integer_bits);
result >>= input_left_shift;
return result;
#else // TFLITE_EMULATE_FLOAT
const double max_input_rescaled =
1.0 * ((1 << input_integer_bits) - 1) *
(1LL << (total_signed_bits - input_integer_bits)) /
(1LL << input_left_shift);
// Tighten bound using floor. Suppose that we could use the exact value.
// After scaling the difference, the result would be at the maximum. Thus we
// must ensure that our value has lower magnitude.
return static_cast<int>(std::floor(max_input_rescaled));
#endif // TFLITE_EMULATE_FLOAT
}
void NudgeQuantizationRange(const float min, const float max,
const int quant_min, const int quant_max,
float* nudged_min, float* nudged_max,
float* nudged_scale) {
// This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
const float quant_min_float = static_cast<float>(quant_min);
const float quant_max_float = static_cast<float>(quant_max);
*nudged_scale = (max - min) / (quant_max_float - quant_min_float);
const float zero_point_from_min = quant_min_float - min / *nudged_scale;
uint16_t nudged_zero_point;
if (zero_point_from_min < quant_min_float) {
nudged_zero_point = static_cast<uint16_t>(quant_min);
} else if (zero_point_from_min > quant_max_float) {
nudged_zero_point = static_cast<uint16_t>(quant_max);
} else {
nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
}
*nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
*nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
}
void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
const float nudged_max, const float* input_data,
float* output_data, const float size) {
// This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
const float inv_nudged_scale = 1.0f / nudged_scale;
for (int i = 0; i < size; i++) {
const float src_val = input_data[i];
const float clamped = std::min(nudged_max, std::max(nudged_min, src_val));
const float clamped_shifted = clamped - nudged_min;
const float dst_val =
TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale +
nudged_min;
output_data[i] = dst_val;
}
}
bool CheckedLog2(const float x, int* log2_result) {
// Using TfLiteRound instead of std::round and std::log instead of
// std::log2 to work around these functions being missing in a toolchain
// used in some TensorFlow tests as of May 2018.
const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
const float x_log2_rounded = TfLiteRound(x_log2);
const float x_log2_fracpart = x_log2 - x_log2_rounded;
*log2_result = static_cast<int>(x_log2_rounded);
return std::abs(x_log2_fracpart) < 1e-3f;
}
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
int32_t* effective_scale_significand,
int* effective_shift) {
for (size_t i = 0; i < size; ++i) {
QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i],
&effective_shift[i]);
}
}
} // namespace tflite

View File

@@ -1,292 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
#include <cmath>
#include <cstdint>
#include <limits>
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
// Given the min and max values of a float array, return
// reasonable quantization parameters to use for this array.
template <typename T>
QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
bool narrow_range) {
const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
const T qmax = std::numeric_limits<T>::max();
const double qmin_double = qmin;
const double qmax_double = qmax;
// 0 should always be a representable value. Let's assume that the initial
// min,max range contains 0.
TFLITE_CHECK_LE(rmin, 0.);
TFLITE_CHECK_GE(rmax, 0.);
if (rmin == rmax) {
// Special case where the min,max range is a point. Should be {0}.
TFLITE_CHECK_EQ(rmin, 0.);
TFLITE_CHECK_EQ(rmax, 0.);
QuantizationParams quantization_params;
quantization_params.zero_point = 0;
quantization_params.scale = 0.;
return quantization_params;
}
// General case.
//
// First determine the scale.
const double scale = (rmax - rmin) / (qmax_double - qmin_double);
// Zero-point computation.
// First the initial floating-point computation. The zero-point can be
// determined from solving an affine equation for any known pair
// (real value, corresponding quantized value).
// We know two such pairs: (rmin, qmin) and (rmax, qmax).
// The arithmetic error on the zero point computed from either pair
// will be roughly machine_epsilon * (sum of absolute values of terms)
// so we want to use the variant that adds the smaller terms.
const double zero_point_from_min = qmin_double - rmin / scale;
const double zero_point_from_max = qmax_double - rmax / scale;
const double zero_point_from_min_error =
std::abs(qmin_double) + std::abs(rmin / scale);
const double zero_point_from_max_error =
std::abs(qmax_double) + std::abs(rmax / scale);
const double zero_point_double =
zero_point_from_min_error < zero_point_from_max_error
? zero_point_from_min
: zero_point_from_max;
// Now we need to nudge the zero point to be an integer
// (our zero points are integer, and this is motivated by the requirement
// to be able to represent the real value "0" exactly as a quantized value,
// which is required in multiple places, for example in Im2col with SAME
// padding).
T nudged_zero_point = 0;
if (zero_point_double < qmin_double) {
nudged_zero_point = qmin;
} else if (zero_point_double > qmax_double) {
nudged_zero_point = qmax;
} else {
nudged_zero_point = static_cast<T>(round(zero_point_double));
}
// The zero point should always be in the range of quantized value,
// [qmin, qmax].
TFLITE_CHECK_GE(nudged_zero_point, qmin);
TFLITE_CHECK_LE(nudged_zero_point, qmax);
// Finally, store the result nudged quantization params.
QuantizationParams quantization_params;
quantization_params.zero_point = nudged_zero_point;
quantization_params.scale = scale;
return quantization_params;
}
template <typename T>
QuantizationParams ChooseQuantizationParams(double rmin, double rmax) {
return ChooseQuantizationParams<T>(rmin, rmax, false);
}
// Converts a floating-point number to an integer. For all inputs x where
// static_cast<IntOut>(x) is legal according to the C++ standard, the result
// is identical to that cast (i.e. the result is x with its fractional part
// truncated whenever that is representable as IntOut).
//
// static_cast would cause undefined behavior for the following cases, which
// have well-defined behavior for this function:
//
// 1. If x is NaN, the result is zero.
//
// 2. If the truncated form of x is above the representable range of IntOut,
// the result is std::numeric_limits<IntOut>::max().
//
// 3. If the truncated form of x is below the representable range of IntOut,
// the result is std::numeric_limits<IntOut>::min().
//
// Note that cases #2 and #3 cover infinities as well as finite numbers.
//
// The range of FloatIn must include the range of IntOut, otherwise
// the results are undefined.
// TODO(sfeuz): Replace by absl::SafeCast once available.
template <class IntOut, class FloatIn>
IntOut SafeCast(FloatIn x) {
static_assert(!std::numeric_limits<FloatIn>::is_integer,
"FloatIn is integer");
static_assert(std::numeric_limits<IntOut>::is_integer,
"IntOut is not integer");
static_assert(std::numeric_limits<IntOut>::radix == 2, "IntOut is base 2");
// Special case NaN, for which the logic below doesn't work.
if (std::isnan(x)) {
return 0;
}
// Negative values all clip to zero for unsigned results.
if (!std::numeric_limits<IntOut>::is_signed && x < 0) {
return 0;
}
// Handle infinities.
if (std::isinf(x)) {
return x < 0 ? std::numeric_limits<IntOut>::min()
: std::numeric_limits<IntOut>::max();
}
// Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0),
// unless x is zero in which case exp == 0. Note that this implies that the
// magnitude of x is strictly less than 2^exp.
int exp = 0;
std::frexp(x, &exp);
// Let N be the number of non-sign bits in the representation of IntOut. If
// the magnitude of x is strictly less than 2^N, the truncated version of x
// is representable as IntOut. The only representable integer for which this
// is not the case is kMin for signed types (i.e. -2^N), but that is covered
// by the fall-through below.
if (exp <= std::numeric_limits<IntOut>::digits) {
return x;
}
// Handle numbers with magnitude >= 2^N.
return x < 0 ? std::numeric_limits<IntOut>::min()
: std::numeric_limits<IntOut>::max();
}
// Decompose a double multiplier into a Q0.31 int32 representation of its
// significand, and shift representation of NEGATIVE its exponent ---
// this is intended as a RIGHT-shift.
//
// Restricted to the case where the multiplier < 1 (and non-negative).
void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
int32_t* quantized_multiplier,
int* left_shift);
// Decompose a double multiplier into a Q0.31 int32 representation of its
// significand, and shift representation of its exponent.
//
// Restricted to the case where the multiplier > 1.
void QuantizeMultiplierGreaterThanOne(double double_multiplier,
int32_t* quantized_multiplier,
int* left_shift);
// Decompose a double multiplier into a Q0.31 int32 representation of its
// significand, and shift representation of its exponent.
//
// Handles an arbitrary positive multiplier. The 'shift' output-value is
// basically the 'floating-point exponent' of the multiplier:
// Negative for a right-shift (when the multiplier is <1), positive for a
// left-shift (when the multiplier is >1)
void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
int* shift);
// Splits a double input value into a returned fraction, and a shift value from
// the exponent, using only bitwise and integer operations to support
// microcontrollers and other environments without floating-point support.
//
// This is designed to be a replacement for how std::frexp() is used within the
// QuantizeMultiplier() function, and so has a different signature than the
// standard version, returning a 64-bit integer rather than a double. This
// result has a maximum value of 1<<31, with the fraction expressed as a
// proportion of that maximum.
//
// std::frexp() returns NaNs and infinities unmodified, but since we're
// returning integers that can't represent those values, instead we return
// a shift of std::numeric_limits<int>::max() for all bad numbers, with an int64
// result of 0 for NaNs, std:numeric_limits<int64_t>::max() for +INFINITY, and
// std::numeric_limits<int64_t>::min() for -INFINITY. Denormalized inputs will
// result in return values that end up truncating some bits at the end,
// reflecting the loss of precision inherent in denormalization.
int64_t IntegerFrExp(double input, int* shift);
// Converts an integer fraction in the format produced by IntegerFrExp (where
// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an
// IEEE binary64 double format result. The implementation uses only integer and
// bitwise operators, so no floating point hardware support or emulation is
// needed. This is here so quantized operations can run non-time-critical
// preparation calculations on microcontrollers and other platforms without
// float support.
double DoubleFromFractionAndShift(int64_t fraction, int shift);
// Performs a multiplication of two numbers in double format, using only integer
// and bitwise instructions. This is aimed at supporting housekeeping functions
// for quantized operations on microcontrollers without floating-point hardware.
double IntegerDoubleMultiply(double a, double b);
// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is
// greater than b. It is implemented using only integer and logical instructions
// so that it can be easily run on microcontrollers for quantized operations.
int IntegerDoubleCompare(double a, double b);
// This first creates a multiplier in a double equivalent of
// Q(input_integer_bits).(31-input_integer_bits) representation, with extra
// precision in the double's fractional bits. It then splits the result into
// significand and exponent.
void PreprocessSoftmaxScaling(double beta, double input_scale,
int input_integer_bits,
int32_t* quantized_multiplier, int* left_shift);
// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
int input_integer_bits,
int32_t* quantized_multiplier,
int* left_shift,
int32_t* reverse_scaling_divisor,
int* reverse_scaling_left_shift);
// Calculate the largest input that will result in a within-bounds intermediate
// result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words,
// it must not overflow before we reduce the value by multiplication by the
// input multiplier. The negative radius is used as the minimum difference in
// Softmax.
int CalculateInputRadius(int input_integer_bits, int input_left_shift,
int total_signed_bits = 31);
// Nudges a min/max quantization range to ensure zero is zero.
// Gymnastics with nudged zero point is to ensure that real zero maps to
// an integer, which is required for e.g. zero-padding in convolutional layers.
// Outputs nudged_min, nudged_max, nudged_scale.
void NudgeQuantizationRange(const float min, const float max,
const int quant_min, const int quant_max,
float* nudged_min, float* nudged_max,
float* nudged_scale);
// Fake quantizes (quantizes and dequantizes) input_data using the scale,
// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code
// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor.
void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
const float nudged_max, const float* input_data,
float* output_data, const float size);
// If x is approximately a power of two (with any positive or negative
// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise
// returns false.
bool CheckedLog2(const float x, int* log2_result);
// Decomposes an array of double multipliers into a Q0.31 int32 representation
// of its significand, and shift representation of its exponent.
//
// Handles an arbitrary multiplier. The 'shift' output-value is
// basically the 'floating-point exponent' of the multiplier:
// Negative for a right-shift (when the multiplier is <1), positive for a
// left-shift (when the multiplier is >1)
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
int32_t* effective_scale_significand,
int* effective_shift);
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_

View File

@@ -1,400 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
#include <algorithm>
#include <type_traits>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
T activation_min, activation_max;
GetActivationParams(params, &activation_min, &activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] + input2_data[i], activation_min, activation_max);
}
}
// Element-wise add that can often be used for inner loop of broadcast add as
// well as the non-broadcast add.
// This function is used for 8-bit as well as for 16-bit, but the accumulator
// is 32-bit for both cases. The overflow does not happen due to the
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
template <typename T>
inline void AddElementwise(int size, const ArithmeticParams& params,
const T* input1_data, const T* input2_data,
T* output_data) {
TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
for (int i = 0; i < size; ++i) {
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<T>(clamped_output);
}
}
// Scalar-broadcast add that can be used for inner loop of more general
// broadcast add, so that, for example, scalar-broadcast with batch will still
// be fast.
inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
uint8_t input1_data, const uint8_t* input2_data,
uint8_t* output_data) {
TFLITE_DCHECK_GT(params.input1_offset, -256);
TFLITE_DCHECK_GT(params.input2_offset, -256);
TFLITE_DCHECK_LT(params.input1_offset, 256);
TFLITE_DCHECK_LT(params.input2_offset, 256);
const int32_t input1_val = params.input1_offset + input1_data;
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
for (int i = 0; i < size; ++i) {
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<uint8_t>(clamped_output);
}
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const uint8_t* input1_data,
const RuntimeShape& input2_shape, const uint8_t* input2_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
TFLITE_DCHECK_GT(params.input1_offset, -256);
TFLITE_DCHECK_GT(params.input2_offset, -256);
TFLITE_DCHECK_LT(params.input1_offset, 256);
TFLITE_DCHECK_LT(params.input2_offset, 256);
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void AddGeneralParamScale(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int16_t* input1_data,
const RuntimeShape& input2_shape,
const int16_t* input2_data,
const RuntimeShape& output_shape,
int16_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
int max_value = std::numeric_limits<int16_t>::max();
TFLITE_DCHECK_GT(params.input1_offset, -max_value);
TFLITE_DCHECK_GT(params.input2_offset, -max_value);
TFLITE_DCHECK_LT(params.input1_offset, max_value);
TFLITE_DCHECK_LT(params.input2_offset, max_value);
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int16_t* input1_data,
const RuntimeShape& input2_shape, const int16_t* input2_data,
const RuntimeShape& output_shape, int16_t* output_data,
bool pot_scale = true) {
if (!pot_scale) {
AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
return;
}
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int input1_shift = params.input1_shift;
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
const int16_t output_activation_min = params.quantized_activation_min;
const int16_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
TFLITE_DCHECK_LE(input1_shift, 0);
TFLITE_DCHECK_LE(params.input2_shift, 0);
const int16_t* not_shift_input =
input1_shift == 0 ? input1_data : input2_data;
const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
const int input_right_shift =
input1_shift == 0 ? -params.input2_shift : -input1_shift;
for (int i = 0; i < flat_size; i++) {
// F0 uses 0 integer bits, range [-1, 1].
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
F0 scaled_input = F0::FromRaw(
gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
const int16_t raw_output = result.raw();
const int16_t clamped_output = std::min(
output_activation_max, std::max(output_activation_min, raw_output));
output_data[i] = clamped_output;
}
}
template <typename T>
inline typename std::enable_if<!is_small_integer<T>::value, void>::type
BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
T activation_min, activation_max;
GetActivationParams(params, &activation_min, &activation_max);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
output_data[Offset(extended_output_shape, b, y, x, c)] =
ActivationFunctionWithMinMax<T>(
input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
activation_min, activation_max);
}
}
}
}
}
// This function is used for 8-bit as well as for 16-bit, but the accumulator
// is 32-bit for both cases. The overflow does not happen due to the
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
template <typename T>
inline typename std::enable_if<is_small_integer<T>::value, void>::type
BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
const int32_t input1_val =
params.input1_offset +
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
const int32_t input2_val =
params.input2_offset +
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
const int32_t shifted_input1_val =
input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val =
input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier,
params.input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier,
params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[Offset(extended_output_shape, b, y, x, c)] =
static_cast<T>(clamped_output);
}
}
}
}
}
inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
const RuntimeShape& unswitched_input1_shape,
const uint8_t* unswitched_input1_data,
const RuntimeShape& unswitched_input2_shape,
const uint8_t* unswitched_input2_data,
const RuntimeShape& output_shape,
uint8_t* output_data) {
ArithmeticParams switched_params = unswitched_params;
switched_params.input1_offset = unswitched_params.input2_offset;
switched_params.input1_multiplier = unswitched_params.input2_multiplier;
switched_params.input1_shift = unswitched_params.input2_shift;
switched_params.input2_offset = unswitched_params.input1_offset;
switched_params.input2_multiplier = unswitched_params.input1_multiplier;
switched_params.input2_shift = unswitched_params.input1_shift;
const bool use_unswitched =
unswitched_params.broadcast_category ==
tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
const ArithmeticParams& params =
use_unswitched ? unswitched_params : switched_params;
const uint8_t* input1_data =
use_unswitched ? unswitched_input1_data : unswitched_input2_data;
const uint8_t* input2_data =
use_unswitched ? unswitched_input2_data : unswitched_input1_data;
// Fivefold nested loops. The second input resets its position for each
// iteration of the second loop. The first input resets its position at the
// beginning of the fourth loop. The innermost loop is an elementwise add of
// sections of the arrays.
uint8_t* output_data_ptr = output_data;
const uint8_t* input1_data_ptr = input1_data;
const uint8_t* input2_data_reset = input2_data;
// In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
// between input shapes. y3 for input 1 is always broadcast, and so the
// dimension there is 1, whereas optionally y1 might be broadcast for input 2.
// Put another way,
// input1.shape.FlatSize = y0 * y1 * y2 * y4,
// input2.shape.FlatSize = y0 * y2 * y3 * y4.
int y0 = params.broadcast_shape[0];
int y1 = params.broadcast_shape[1];
int y2 = params.broadcast_shape[2];
int y3 = params.broadcast_shape[3];
int y4 = params.broadcast_shape[4];
if (y4 > 1) {
// General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
// dimension.
for (int i0 = 0; i0 < y0; ++i0) {
const uint8_t* input2_data_ptr;
for (int i1 = 0; i1 < y1; ++i1) {
input2_data_ptr = input2_data_reset;
for (int i2 = 0; i2 < y2; ++i2) {
for (int i3 = 0; i3 < y3; ++i3) {
AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
output_data_ptr);
input2_data_ptr += y4;
output_data_ptr += y4;
}
// We have broadcast y4 of input1 data y3 times, and now move on.
input1_data_ptr += y4;
}
}
// We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
input2_data_reset = input2_data_ptr;
}
} else {
// Special case of y4 == 1, in which the innermost loop is a single element
// and can be combined with the next (y3) as an inner broadcast.
//
// Note that this handles the case of pure scalar broadcast when
// y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
// broadcast with batch (as y2 > 1).
//
// NOTE The process is the same as the above general case except simplified
// for y4 == 1 and the loop over y3 is contained within the
// AddScalarBroadcast function.
for (int i0 = 0; i0 < y0; ++i0) {
const uint8_t* input2_data_ptr;
for (int i1 = 0; i1 < y1; ++i1) {
input2_data_ptr = input2_data_reset;
for (int i2 = 0; i2 < y2; ++i2) {
AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
output_data_ptr);
input2_data_ptr += y3;
output_data_ptr += y3;
input1_data_ptr += 1;
}
}
input2_data_reset = input2_data_ptr;
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_

View File

@@ -1,86 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
// T is expected to be either float or int.
template <typename T>
inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
const T* const* input_data, T* output_data) {
// All inputs and output should have the same shape, this is checked during
// Prepare stage.
const size_t size = input_shape.FlatSize();
for (size_t i = 0; i < size; ++i) {
T x = 0;
for (size_t j = 0; j < num_inputs; ++j) {
x += input_data[j][i];
}
output_data[i] = x;
}
}
inline void AddN(const ArithmeticParams& params,
const RuntimeShape& input_shape, const size_t num_inputs,
const int8_t* const* input_data, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
// All inputs should have same zero-point and scale, this is checked during
// Prepare stage.
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
// All inputs and output should have the same shape, this is checked during
// Prepare stage.
const size_t size = input_shape.FlatSize();
for (size_t i = 0; i < size; ++i) {
// accumulate in scaled_x before clamping to avoid overflow
const int32_t x = params.input1_offset; // x = 0
const int32_t shifted_x = x * (1 << params.left_shift);
int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_x, params.input1_multiplier, params.input1_shift);
for (size_t j = 0; j < num_inputs; ++j) {
const int32_t y = params.input1_offset + input_data[j][i];
const int32_t shifted_y = y * (1 << params.left_shift);
int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_y, params.input1_multiplier, params.input1_shift);
scaled_x += scaled_y;
}
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
scaled_x, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<int8_t>(clamped_output);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_

View File

@@ -1,88 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
#include <functional>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
std::function<bool(T, T)> GetComparefunction(bool is_arg_max) {
if (is_arg_max) {
return std::greater<T>();
} else {
return std::less<T>();
}
}
template <typename T1, typename T2, typename T3, typename Cmp>
void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
const T3* input2_data, const RuntimeShape& output_shape,
T2* output_data, const Cmp& cmp) {
TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
output_shape.DimensionsCount());
int axis = input2_data[0];
if (axis < 0) {
axis += input1_shape.DimensionsCount();
}
const int axis_size = input1_shape.Dims(axis);
int outer_size = 1;
for (int i = 0; i < axis; ++i) {
TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
outer_size *= input1_shape.Dims(i);
}
int inner_size = 1;
const int dims_count = input1_shape.DimensionsCount();
for (int i = axis + 1; i < dims_count; ++i) {
TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
inner_size *= input1_shape.Dims(i);
}
for (int outer = 0; outer < outer_size; ++outer) {
for (int inner = 0; inner < inner_size; ++inner) {
auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
T2 min_max_index = 0;
for (int i = 1; i < axis_size; ++i) {
const auto& curr_value =
input1_data[(outer * axis_size + i) * inner_size + inner];
if (cmp(curr_value, min_max_value)) {
min_max_value = curr_value;
min_max_index = static_cast<T2>(i);
}
}
output_data[outer * inner_size + inner] = min_max_index;
}
}
}
template <typename T1, typename T2, typename T3>
void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
const T3* input2_data, const RuntimeShape& output_shape,
T2* output_data, const bool is_arg_max) {
ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
GetComparefunction<T1>(is_arg_max));
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_

View File

@@ -1,275 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
#include <algorithm>
#include <cstdint>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
namespace batch_matmul {
// Determine which dimension is the broadcast dimension.
inline int broadcast_dim(int lhs_dim, int rhs_dim) {
if (lhs_dim == rhs_dim) return lhs_dim;
if (lhs_dim == 1) return rhs_dim;
TFLITE_DCHECK_EQ(rhs_dim, 1);
return lhs_dim;
}
// Compute the "extent" for iterating on this dimension.
// If we are broadcasting, then don't advance (i.e return 0).
inline int extent(const RuntimeShape& shape, int x) {
if (shape.Dims(x) == 1) {
return 0;
}
int prod = 1;
for (int i = x + 1; i < shape.DimensionsCount(); ++i) {
prod *= shape.Dims(i);
}
return prod;
}
} // namespace batch_matmul
template <typename Ta, typename Tb, typename Tout>
inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data,
const RuntimeShape& rhs_shape, const Tb* rhs_data,
const RuntimeShape& output_shape, Tout* output_data) {
const RuntimeShape extended_lhs_shape =
RuntimeShape::ExtendedShape(5, lhs_shape);
const RuntimeShape extended_rhs_shape =
RuntimeShape::ExtendedShape(5, rhs_shape);
const int batch_dim0 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
const int batch_dim1 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
const int batch_dim2 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
// Set params for each matrix multiply.
const int lhs_rows = extended_lhs_shape.Dims(3);
const int rhs_cols = extended_rhs_shape.Dims(4);
const int accum_depth = extended_lhs_shape.Dims(4);
for (int b0 = 0; b0 < batch_dim0; ++b0) {
const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
for (int b1 = 0; b1 < batch_dim1; ++b1) {
const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
for (int b2 = 0; b2 < batch_dim2; ++b2) {
const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
b1 * batch_dim2 + b2) *
lhs_rows * rhs_cols;
for (int j = 0; j < rhs_cols; ++j) {
for (int i = 0; i < lhs_rows; ++i) {
Tout total = 0;
for (int k = 0; k < accum_depth; ++k) {
total += static_cast<Tout>(lhs_ptr2[accum_depth * i + k]) *
static_cast<Tout>(rhs_ptr2[j * accum_depth + k]);
}
int idx = lhs_rows * j + i;
out_ptr[idx] = total;
}
}
}
}
}
}
inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data,
const RuntimeShape& rhs_shape, const int8_t* rhs_data,
const float* scaling_factors,
const int32_t* input_offset, int32_t* row_sums,
const RuntimeShape& output_shape, float* output_data,
bool* compute_row_sums) {
const RuntimeShape extended_lhs_shape =
RuntimeShape::ExtendedShape(5, lhs_shape);
const RuntimeShape extended_rhs_shape =
RuntimeShape::ExtendedShape(5, rhs_shape);
const int batch_dim0 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
const int batch_dim1 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
const int batch_dim2 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
// Set params for each matrix multiply.
const int lhs_rows = extended_lhs_shape.Dims(3);
const int rhs_cols = extended_rhs_shape.Dims(4);
const int accum_depth = extended_lhs_shape.Dims(4);
const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols;
const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols;
const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols;
const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows;
const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows;
const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows;
if (!compute_row_sums || *compute_row_sums) {
int num_weights_matrices = 1;
for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) {
num_weights_matrices *= extended_lhs_shape.Dims(i);
}
tensor_utils::ReductionSumVector(
lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth);
if (compute_row_sums) {
*compute_row_sums = false;
}
}
for (int b0 = 0; b0 < batch_dim0; ++b0) {
const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0);
const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0);
const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0);
for (int b1 = 0; b1 < batch_dim1; ++b1) {
const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1);
const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1);
const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1);
for (int b2 = 0; b2 < batch_dim2; ++b2) {
const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2);
const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2);
const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2);
float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
b1 * batch_dim2 + b2) *
lhs_rows * rhs_cols;
for (int j = 0; j < rhs_cols; ++j) {
const float batch_scaling_factor = scale_ptr2[j];
const float batch_offset = static_cast<float>(ioff_ptr2[j]);
for (int i = 0; i < lhs_rows; ++i) {
int32_t total = 0;
for (int k = 0; k < accum_depth; ++k) {
total +=
lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k];
}
int32_t row_sum = woff_ptr2[i];
total -= row_sum * batch_offset;
int idx = lhs_rows * j + i;
out_ptr[idx] += batch_scaling_factor * total;
}
}
}
}
}
}
template <typename T, typename AccumT>
inline void BatchMatMul(const FullyConnectedParams& params,
const RuntimeShape& lhs_shape, const T* lhs_data,
const RuntimeShape& rhs_shape, const T* rhs_data,
const RuntimeShape& output_shape, T* output_data) {
const RuntimeShape extended_lhs_shape =
RuntimeShape::ExtendedShape(5, lhs_shape);
const RuntimeShape extended_rhs_shape =
RuntimeShape::ExtendedShape(5, rhs_shape);
const int batch_dim0 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
const int batch_dim1 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
const int batch_dim2 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
// Set params for each matrix multiply.
const int lhs_rows = extended_lhs_shape.Dims(3);
const int rhs_cols = extended_rhs_shape.Dims(4);
const int accum_depth = extended_lhs_shape.Dims(4);
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
for (int b0 = 0; b0 < batch_dim0; ++b0) {
const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
for (int b1 = 0; b1 < batch_dim1; ++b1) {
const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
for (int b2 = 0; b2 < batch_dim2; ++b2) {
const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
T* out_ptr = output_data +
((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
lhs_rows * rhs_cols;
for (int j = 0; j < rhs_cols; ++j) {
for (int i = 0; i < lhs_rows; ++i) {
AccumT total = 0;
for (int k = 0; k < accum_depth; ++k) {
AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
AccumT rhs_val = rhs_ptr2[accum_depth * j + k];
total += (lhs_val + filter_offset) * (rhs_val + input_offset);
}
int32_t total_scaled = MultiplyByQuantizedMultiplier(
total, output_multiplier, output_shift);
total_scaled += output_offset;
total_scaled = std::max(total_scaled, output_activation_min);
total_scaled = std::min(total_scaled, output_activation_max);
const int idx = lhs_rows * j + i;
out_ptr[idx] = static_cast<T>(total_scaled);
}
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_

View File

@@ -1,101 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
#include <cmath>
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// TODO(b/135760455): Move this method anonymous namespace in a cc file.
inline RuntimeShape ExtendShapeBatchToSpace(const RuntimeShape& shape) {
if (shape.DimensionsCount() == 4) {
return shape;
}
RuntimeShape new_shape(4, 1);
new_shape.SetDim(0, shape.Dims(0));
new_shape.SetDim(1, shape.Dims(1));
new_shape.SetDim(3, shape.Dims(2));
return new_shape;
}
template <typename T>
inline void BatchToSpaceND(const RuntimeShape& unextended_input1_shape,
const T* input1_data,
const RuntimeShape& unextended_input2_shape,
const int32_t* block_shape_data,
const RuntimeShape& unextended_input3_shape,
const int32_t* crops_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
ruy::profiler::ScopeLabel label("BatchToSpaceND");
TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3);
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(),
unextended_output_shape.DimensionsCount());
const RuntimeShape input1_shape =
ExtendShapeBatchToSpace(unextended_input1_shape);
const RuntimeShape output_shape =
ExtendShapeBatchToSpace(unextended_output_shape);
const int output_width = output_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_batch_size = output_shape.Dims(0);
const int depth = input1_shape.Dims(3);
const int input_width = input1_shape.Dims(2);
const int input_height = input1_shape.Dims(1);
const int input_batch_size = input1_shape.Dims(0);
const int block_shape_height = block_shape_data[0];
const int block_shape_width =
unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1;
const int crops_top = crops_data[0];
const int crops_left =
unextended_input1_shape.DimensionsCount() == 4 ? crops_data[2] : 0;
for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) {
const int out_batch = in_batch % output_batch_size;
const int spatial_offset = in_batch / output_batch_size;
for (int in_h = 0; in_h < input_height; ++in_h) {
const int out_h = in_h * block_shape_height +
spatial_offset / block_shape_width - crops_top;
if (out_h < 0 || out_h >= output_height) {
continue;
}
for (int in_w = 0; in_w < input_width; ++in_w) {
const int out_w = in_w * block_shape_width +
spatial_offset % block_shape_width - crops_left;
if (out_w < 0 || out_w >= output_width) {
continue;
}
T* out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0);
const T* in =
input1_data + Offset(input1_shape, in_batch, in_h, in_w, 0);
memcpy(out, in, depth * sizeof(T));
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_

View File

@@ -1,91 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// Also appears to duplicate MinimumMaximum.
//
// R: Result type. T1: Input 1 type. T2: Input 2 type.
template <typename R, typename T1, typename T2>
inline void BroadcastBinaryFunction4DSlow(
const RuntimeShape& unextended_input1_shape, const T1* input1_data,
const RuntimeShape& unextended_input2_shape, const T2* input2_data,
const RuntimeShape& unextended_output_shape, R* output_data,
R (*func)(T1, T2)) {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
const int* dims_data =
reinterpret_cast<const int*>(output_shape.DimsDataUpTo5D());
for (int b = 0; b < output_shape.Dims(0); ++b) {
int out_idx_b = b * dims_data[1];
int in_idx1_b = desc1.strides[0] * b;
int in_idx2_b = desc2.strides[0] * b;
for (int y = 0; y < output_shape.Dims(1); ++y) {
int out_idx_y = (out_idx_b + y) * dims_data[2];
int in_idx1_y = in_idx1_b + desc1.strides[1] * y;
int in_idx2_y = in_idx2_b + desc2.strides[1] * y;
for (int x = 0; x < output_shape.Dims(2); ++x) {
int out_idx_x = (out_idx_y + x) * dims_data[3];
int in1_idx = in_idx1_y + desc1.strides[2] * x;
int in2_idx = in_idx2_y + desc2.strides[2] * x;
for (int c = 0; c < output_shape.Dims(3); ++c) {
auto out_idx = out_idx_x + c;
auto in1_val = input1_data[in1_idx];
auto in2_val = input2_data[in2_idx];
output_data[out_idx] = func(in1_val, in2_val);
in1_idx += desc1.strides[3];
in2_idx += desc2.strides[3];
}
}
}
}
}
// R: Result type. T1: Input 1 type. T2: Input 2 type.
template <typename R, typename T1, typename T2>
inline void BinaryFunction(const RuntimeShape& input1_shape,
const T1* input1_data,
const RuntimeShape& input2_shape,
const T2* input2_data,
const RuntimeShape& output_shape, R* output_data,
R (*func)(T1, T2)) {
const int flat_size =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = func(input1_data[i], input2_data[i]);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_

View File

@@ -1,56 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
void BroadcastArgs(const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
// Gets data at the backward index i of the shape tensor. Returns 1 if the
// index is out of range.
auto get_shape_data = [](const RuntimeShape& shape, const T* data,
int backward_idx) -> T {
int forward_idx = shape.FlatSize() - 1 - backward_idx;
if (forward_idx < 0) return 1;
return data[forward_idx];
};
int output_num_elements = output_shape.FlatSize();
for (int i = 0; i < output_num_elements; ++i) {
int backward_i = output_num_elements - 1 - i;
int shape1_i = get_shape_data(input1_shape, input1_data, i);
int shape2_i = get_shape_data(input2_shape, input2_data, i);
if (shape1_i == 1) {
output_data[backward_i] = shape2_i;
} else if (shape2_i == 1) {
output_data[backward_i] = shape1_i;
} else {
TFLITE_CHECK_EQ(shape1_i, shape2_i);
output_data[backward_i] = shape1_i;
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_

View File

@@ -1,97 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/kernel_util.h"
namespace tflite {
namespace reference_ops {
template <int N>
void BroadcastImpl(const NdArrayDesc<N>& input_desc, const char* input_data,
const NdArrayDesc<N>& output_desc, char* output_data,
int indexes[N], int dim, const int last_broadcasting_dim,
const int type_size) {
// Copy data from input to output.
if (dim == last_broadcasting_dim) {
int copy_size = output_desc.strides[dim] * type_size;
const char* data_src =
input_data + SubscriptToIndex(input_desc, indexes) * type_size;
char* data_dst =
output_data + SubscriptToIndex(output_desc, indexes) * type_size;
for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
memcpy(data_dst, data_src, copy_size);
}
return;
}
// Recursive call to find the next broadcasting.
for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim];
++indexes[dim]) {
BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes,
dim + 1, last_broadcasting_dim, type_size);
}
// Duplicate data in output tensor.
indexes[dim] = 0;
if (input_desc.extents[dim] != output_desc.extents[dim]) {
int copy_size = output_desc.strides[dim] * type_size;
char* data_src =
output_data + SubscriptToIndex(output_desc, indexes) * type_size;
char* data_dst = data_src + copy_size;
for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
memcpy(data_dst, data_src, copy_size);
}
}
}
template <int N>
inline void BroadcastTo(const RuntimeShape& unextended_input_shape,
const char* input_data,
const RuntimeShape& unextended_output_shape,
char* output_data, TfLiteType data_type) {
NdArrayDesc<N> input_desc;
NdArrayDesc<N> output_desc;
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape),
&input_desc);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
// Get the last dimension has broadcasting. At this dimension, the data is
// copied from input tensor to output tensor.
int last_broadcast_dim = -1;
for (int i = N - 1; i >= 0; --i) {
if (input_desc.extents[i] != output_desc.extents[i]) {
last_broadcast_dim = i;
break;
}
}
// If non-broadcasting, just copy data from input to output tensor.
if (last_broadcast_dim == -1) {
memcpy(output_data, input_data,
unextended_input_shape.FlatSize() * TfLiteTypeGetSize(data_type));
return;
}
// Broadcasting using memcpy.
int indexes[N] = {0};
BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes, 0,
last_broadcast_dim, TfLiteTypeGetSize(data_type));
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_

View File

@@ -1,37 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
#include <cmath>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void Ceil(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = std::ceil(input_data[i]);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_

View File

@@ -1,280 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline bool EqualFn(T lhs, T rhs) {
return lhs == rhs;
}
template <typename T>
inline bool NotEqualFn(T lhs, T rhs) {
return lhs != rhs;
}
template <typename T>
inline bool GreaterFn(T lhs, T rhs) {
return lhs > rhs;
}
template <typename T>
inline bool GreaterEqualFn(T lhs, T rhs) {
return lhs >= rhs;
}
template <typename T>
inline bool LessFn(T lhs, T rhs) {
return lhs < rhs;
}
template <typename T>
inline bool LessEqualFn(T lhs, T rhs) {
return lhs <= rhs;
}
template <typename T>
using ComparisonFn = bool (*)(T, T);
template <typename T, ComparisonFn<T> F>
inline void ComparisonImpl(
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
const int64_t flatsize =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int64_t i = 0; i < flatsize; ++i) {
output_data[i] = F(input1_data[i], input2_data[i]);
}
}
template <ComparisonFn<float> F>
inline void Comparison(const ComparisonParams& op_params,
const RuntimeShape& input1_shape,
const float* input1_data,
const RuntimeShape& input2_shape,
const float* input2_data,
const RuntimeShape& output_shape, bool* output_data) {
ComparisonImpl<float, F>(op_params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
}
template <typename T, ComparisonFn<int32_t> F>
inline void ComparisonWithScaling(
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
int left_shift = op_params.left_shift;
int32_t input1_offset = op_params.input1_offset;
int32_t input1_multiplier = op_params.input1_multiplier;
int input1_shift = op_params.input1_shift;
int32_t input2_offset = op_params.input2_offset;
int32_t input2_multiplier = op_params.input2_multiplier;
int input2_shift = op_params.input2_shift;
const int64_t flatsize =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
for (int64_t i = 0; i < flatsize; ++i) {
const int32_t input1_val = input1_offset + input1_data[i];
const int32_t input2_val = input2_offset + input2_data[i];
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, input1_multiplier, input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, input2_multiplier, input2_shift);
output_data[i] = F(scaled_input1_val, scaled_input2_val);
}
}
struct BroadcastComparison4DSlowCommon {
const RuntimeShape output_shape;
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
};
inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess(
const RuntimeShape& unextended_input1_shape,
const RuntimeShape& unextended_input2_shape,
const RuntimeShape& unextended_output_shape) {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1,
desc2};
}
template <typename T, ComparisonFn<T> F>
inline void BroadcastComparison4DSlowImpl(
const ComparisonParams& op_params,
const RuntimeShape& unextended_input1_shape, const T* input1_data,
const RuntimeShape& unextended_input2_shape, const T* input2_data,
const RuntimeShape& unextended_output_shape, bool* output_data) {
const BroadcastComparison4DSlowCommon dims =
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
unextended_input2_shape,
unextended_output_shape);
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
output_data[Offset(dims.output_shape, b, y, x, c)] =
F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)],
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]);
}
}
}
}
}
template <ComparisonFn<float> F>
inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
const RuntimeShape& input1_shape,
const float* input1_data,
const RuntimeShape& input2_shape,
const float* input2_data,
const RuntimeShape& output_shape,
bool* output_data) {
BroadcastComparison4DSlowImpl<float, F>(op_params, input1_shape, input1_data,
input2_shape, input2_data,
output_shape, output_data);
}
template <typename T, ComparisonFn<int32_t> F>
inline void BroadcastComparison4DSlowWithScaling(
const ComparisonParams& op_params,
const RuntimeShape& unextended_input1_shape, const T* input1_data,
const RuntimeShape& unextended_input2_shape, const T* input2_data,
const RuntimeShape& unextended_output_shape, bool* output_data) {
const BroadcastComparison4DSlowCommon dims =
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
unextended_input2_shape,
unextended_output_shape);
int left_shift = op_params.left_shift;
int32_t input1_offset = op_params.input1_offset;
int32_t input1_multiplier = op_params.input1_multiplier;
int input1_shift = op_params.input1_shift;
int32_t input2_offset = op_params.input2_offset;
int32_t input2_multiplier = op_params.input2_multiplier;
int input2_shift = op_params.input2_shift;
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
const int32_t input1_val =
input1_offset +
input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
const int32_t input2_val =
input2_offset +
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, input1_multiplier, input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, input2_multiplier, input2_shift);
output_data[Offset(dims.output_shape, b, y, x, c)] =
F(scaled_input1_val, scaled_input2_val);
}
}
}
}
}
#define TFLITE_COMPARISON_OP(name) \
inline void name(const ComparisonParams& op_params, \
const RuntimeShape& input1_shape, const float* input1_data, \
const RuntimeShape& input2_shape, const float* input2_data, \
const RuntimeShape& output_shape, bool* output_data) { \
Comparison<name##Fn>(op_params, input1_shape, input1_data, input2_shape, \
input2_data, output_shape, output_data); \
} \
template <typename T> \
inline void name##NoScaling( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const T* input1_data, const RuntimeShape& input2_shape, \
const T* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data, \
input2_shape, input2_data, output_shape, \
output_data); \
} \
template <typename T> \
inline void name##WithScaling( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const T* input1_data, const RuntimeShape& input2_shape, \
const T* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data, \
input2_shape, input2_data, \
output_shape, output_data); \
} \
template <typename T> \
inline void Broadcast4DSlow##name##NoScaling( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const T* input1_data, const RuntimeShape& input2_shape, \
const T* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
BroadcastComparison4DSlowImpl<T, name##Fn>( \
op_params, input1_shape, input1_data, input2_shape, input2_data, \
output_shape, output_data); \
} \
inline void Broadcast4DSlow##name( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const float* input1_data, const RuntimeShape& input2_shape, \
const float* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data, \
input2_shape, input2_data, \
output_shape, output_data); \
} \
template <typename T> \
inline void Broadcast4DSlow##name##WithScaling( \
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
const T* input1_data, const RuntimeShape& input2_shape, \
const T* input2_data, const RuntimeShape& output_shape, \
bool* output_data) { \
BroadcastComparison4DSlowWithScaling<T, name##Fn>( \
op_params, input1_shape, input1_data, input2_shape, input2_data, \
output_shape, output_data); \
}
TFLITE_COMPARISON_OP(Equal);
TFLITE_COMPARISON_OP(NotEqual);
TFLITE_COMPARISON_OP(Greater);
TFLITE_COMPARISON_OP(GreaterEqual);
TFLITE_COMPARISON_OP(Less);
TFLITE_COMPARISON_OP(LessEqual);
#undef TFLITE_COMPARISON_OP
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_

View File

@@ -1,141 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename Scalar>
inline void Concatenation(const ConcatenationParams& params,
const RuntimeShape* const* input_shapes,
const Scalar* const* input_data,
const RuntimeShape& output_shape,
Scalar* output_data) {
int axis = params.axis;
int inputs_count = params.inputs_count;
const int concat_dimensions = output_shape.DimensionsCount();
TFLITE_DCHECK_LT(axis, concat_dimensions);
int64_t concat_size = 0;
for (int i = 0; i < inputs_count; i++) {
TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
for (int j = 0; j < concat_dimensions; j++) {
if (j != axis) {
MatchingDim(*input_shapes[i], j, output_shape, j);
}
}
concat_size += input_shapes[i]->Dims(axis);
}
TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
int64_t outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= output_shape.Dims(i);
}
// For all input arrays,
// FlatSize() = outer_size * Dims(axis) * base_inner_size;
int64_t base_inner_size = 1;
for (int i = axis + 1; i < concat_dimensions; ++i) {
base_inner_size *= output_shape.Dims(i);
}
Scalar* output_ptr = output_data;
for (int k = 0; k < outer_size; k++) {
for (int i = 0; i < inputs_count; ++i) {
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
const Scalar* input_ptr = input_data[i] + k * copy_size;
memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
output_ptr += copy_size;
}
}
}
// TODO(b/174275780): The quantized implementation of concatentation isn't fully
// quantized as it takes scale as a floating point value. This should be fixed
// when optimizng this routine further.
inline void ConcatenationWithScaling(const ConcatenationParams& params,
const RuntimeShape* const* input_shapes,
const uint8_t* const* input_data,
const RuntimeShape& output_shape,
uint8_t* output_data) {
int axis = params.axis;
const int32_t* input_zeropoint = params.input_zeropoint;
const float* input_scale = params.input_scale;
int inputs_count = params.inputs_count;
const int32_t output_zeropoint = params.output_zeropoint;
const float output_scale = params.output_scale;
const int concat_dimensions = output_shape.DimensionsCount();
TFLITE_DCHECK_LT(axis, concat_dimensions);
int64_t concat_size = 0;
for (int i = 0; i < inputs_count; i++) {
TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
for (int j = 0; j < concat_dimensions; j++) {
if (j != axis) {
MatchingDim(*input_shapes[i], j, output_shape, j);
}
}
concat_size += input_shapes[i]->Dims(axis);
}
TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
int64_t outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= output_shape.Dims(i);
}
// For all input arrays,
// FlatSize() = outer_size * Dims(axis) * base_inner_size;
int64_t base_inner_size = 1;
for (int i = axis + 1; i < concat_dimensions; ++i) {
base_inner_size *= output_shape.Dims(i);
}
const float inverse_output_scale = 1.f / output_scale;
uint8_t* output_ptr = output_data;
for (int k = 0; k < outer_size; k++) {
for (int i = 0; i < inputs_count; ++i) {
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
const uint8_t* input_ptr = input_data[i] + k * copy_size;
if (input_zeropoint[i] == output_zeropoint &&
input_scale[i] == output_scale) {
memcpy(output_ptr, input_ptr, copy_size);
} else {
const float scale = input_scale[i] * inverse_output_scale;
const float bias = -input_zeropoint[i] * scale;
for (int j = 0; j < copy_size; ++j) {
const int32_t value = static_cast<int32_t>(tflite::TfLiteRound(
input_ptr[j] * scale + bias)) +
output_zeropoint;
output_ptr[j] = static_cast<uint8_t>(
std::max<int32_t>(std::min<int32_t>(255, value), 0));
}
}
output_ptr += copy_size;
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_

View File

@@ -1,287 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& filter_shape,
const float* filter_data, const RuntimeShape& bias_shape,
const float* bias_data, const RuntimeShape& output_shape,
float* output_data, const RuntimeShape& im2col_shape,
float* im2col_data) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
(void)im2col_data; // only used in optimized code.
(void)im2col_shape; // only used in optimized code.
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = input_shape.Dims(3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
const int in_y_origin = (out_y * stride_height) - pad_height;
for (int out_x = 0; out_x < output_width; ++out_x) {
const int in_x_origin = (out_x * stride_width) - pad_width;
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
auto group = out_channel / filters_per_group;
float total = 0.f;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
const int in_y = in_y_origin + dilation_height_factor * filter_y;
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (!is_point_inside_image) {
continue;
}
for (int in_channel = 0; in_channel < filter_input_depth;
++in_channel) {
float input_value =
input_data[Offset(input_shape, batch, in_y, in_x,
in_channel + group * filter_input_depth)];
float filter_value = filter_data[Offset(
filter_shape, out_channel, filter_y, filter_x, in_channel)];
total += (input_value * filter_value);
}
}
}
float bias_value = 0.0f;
if (bias_data) {
bias_value = bias_data[out_channel];
}
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
ActivationFunctionWithMinMax(total + bias_value,
output_activation_min,
output_activation_max);
}
}
}
}
}
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data, const RuntimeShape& im2col_shape,
uint8_t* im2col_data, void* cpu_backend_context) {
(void)cpu_backend_context; // only used in optimized code.
(void)im2col_data; // only used in optimized code.
(void)im2col_shape; // only used in optimized code.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = input_shape.Dims(3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
const int in_y_origin = (out_y * stride_height) - pad_height;
for (int out_x = 0; out_x < output_width; ++out_x) {
const int in_x_origin = (out_x * stride_width) - pad_width;
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
auto group = out_channel / filters_per_group;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
const int in_y = in_y_origin + dilation_height_factor * filter_y;
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (!is_point_inside_image) {
continue;
}
for (int in_channel = 0; in_channel < filter_input_depth;
++in_channel) {
int32_t input_val =
input_data[Offset(input_shape, batch, in_y, in_x,
in_channel + group * filter_input_depth)];
int32_t filter_val = filter_data[Offset(
filter_shape, out_channel, filter_y, filter_x, in_channel)];
acc +=
(filter_val + filter_offset) * (input_val + input_offset);
}
}
}
if (bias_data) {
acc += bias_data[out_channel];
}
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
output_shift);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
static_cast<uint8_t>(acc);
}
}
}
}
}
inline void HybridConvPerChannel(
const ConvParams& params, float* scaling_factors_ptr,
const RuntimeShape& input_shape, const int8_t* input_data,
const RuntimeShape& filter_shape, const int8_t* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const RuntimeShape& im2col_shape, int8_t* im2col_data,
const float* per_channel_scale, int32_t* input_offset) {
(void)im2col_data; // only used in optimized code.
(void)im2col_shape; // only used in optimized code.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = input_shape.Dims(3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
auto group = out_channel / filters_per_group;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int in_channel = 0; in_channel < filter_input_depth;
++in_channel) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// If the location is outside the bounds of the input image,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x,
in_channel + group * filter_input_depth)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
acc += filter_val * (input_val - input_offset[batch]);
}
}
}
}
float acc_float =
acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
if (bias_data) {
acc_float += bias_data[out_channel];
}
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
ActivationFunctionWithMinMax(acc_float, output_activation_min,
output_activation_max);
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_

View File

@@ -1,175 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
#include <algorithm>
#include <cstdint>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
bool exclusive, bool reverse, T* output_data) {
const int32_t rank = shape.DimensionsCount();
TFLITE_DCHECK_GE(rank, 1);
TFLITE_DCHECK_GE(axis, 0);
TFLITE_DCHECK_LT(axis, rank);
size_t inner = 1;
size_t outer = 1;
size_t depth = 1;
for (int32_t i = 0; i < rank; i++) {
if (i < axis)
inner *= shape.Dims(i);
else if (i > axis)
outer *= shape.Dims(i);
else
depth = shape.Dims(i);
}
for (size_t outer_index = 0; outer_index < outer; outer_index++) {
size_t outer_index_adj;
if (reverse)
outer_index_adj = (outer - 1) - outer_index;
else
outer_index_adj = outer_index;
for (size_t inner_index = 0; inner_index < inner; inner_index++) {
T accumulator = 0;
size_t inner_index_adj;
if (reverse)
inner_index_adj = (inner - 1) - inner_index;
else
inner_index_adj = inner_index;
for (size_t depth_index = 0; depth_index < depth; depth_index++) {
size_t depth_index_adj;
if (reverse)
depth_index_adj = (depth - 1) - depth_index;
else
depth_index_adj = depth_index;
size_t index = outer_index_adj;
index += inner_index_adj * depth * outer;
index += depth_index_adj * outer;
if (exclusive) {
output_data[index] = accumulator;
accumulator += input_data[index];
} else {
accumulator += input_data[index];
output_data[index] = accumulator;
}
}
}
}
}
//
// Quantized INT8 CUMSUM
//
inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
const RuntimeShape& shape, int32_t axis, bool exclusive,
bool reverse, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
// All inputs should have same zero-point and scale, this is checked during
// Prepare stage.
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
const int32_t rank = shape.DimensionsCount();
TFLITE_DCHECK_GE(rank, 1);
TFLITE_DCHECK_GE(axis, 0);
TFLITE_DCHECK_LT(axis, rank);
size_t inner = 1;
size_t outer = 1;
size_t depth = 1;
for (int32_t i = 0; i < rank; i++) {
if (i < axis)
inner *= shape.Dims(i);
else if (i > axis)
outer *= shape.Dims(i);
else
depth = shape.Dims(i);
}
for (size_t outer_index = 0; outer_index < outer; outer_index++) {
size_t outer_index_adj;
if (reverse)
outer_index_adj = (outer - 1) - outer_index;
else
outer_index_adj = outer_index;
for (size_t inner_index = 0; inner_index < inner; inner_index++) {
int32_t accumulator = params.input1_offset; // accumulator = 0
accumulator *= (1 << params.left_shift);
accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
accumulator, params.input1_multiplier, params.input1_shift);
size_t inner_index_adj;
if (reverse)
inner_index_adj = (inner - 1) - inner_index;
else
inner_index_adj = inner_index;
for (size_t depth_index = 0; depth_index < depth; depth_index++) {
size_t depth_index_adj;
if (reverse)
depth_index_adj = (depth - 1) - depth_index;
else
depth_index_adj = depth_index;
size_t index = outer_index_adj;
index += inner_index_adj * depth * outer;
index += depth_index_adj * outer;
const int32_t y = params.input1_offset + input_data[index];
const int32_t shifted_y = y * (1 << params.left_shift);
const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_y, params.input1_multiplier, params.input1_shift);
int32_t scaled_output;
if (exclusive) {
scaled_output = accumulator;
accumulator += scaled_y;
} else {
accumulator += scaled_y;
scaled_output = accumulator;
}
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
scaled_output, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[index] = static_cast<int8_t>(clamped_output);
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_

View File

@@ -1,79 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int input_depth = input_shape.Dims(3);
const int input_width = input_shape.Dims(2);
const int input_height = input_shape.Dims(1);
const int input_batch = input_shape.Dims(0);
const int output_depth = output_shape.Dims(3);
const int output_width = output_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_batch = output_shape.Dims(0);
const int32_t block_size = op_params.block_size;
TFLITE_DCHECK_EQ(input_width * block_size, output_width);
TFLITE_DCHECK_EQ(input_height * block_size, output_height);
TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size);
TFLITE_DCHECK_EQ(input_batch, output_batch);
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_h = 0; out_h < output_height; ++out_h) {
for (int out_w = 0; out_w < output_width; ++out_w) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
const int in_d =
out_d + ((out_h % block_size) * block_size + out_w % block_size) *
output_depth;
const int in_w = out_w / block_size;
const int in_h = out_h / block_size;
const int in_b = out_b;
const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
const int output_index =
Offset(output_shape, out_b, out_h, out_w, out_d);
output_data[output_index] = input_data[input_index];
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_

View File

@@ -1,100 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void DepthwiseConv(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& filter_shape,
const float* filter_data, const RuntimeShape& bias_shape,
const float* bias_data, const RuntimeShape& output_shape,
float* output_data) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int b = 0; b < batches; ++b) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int ic = 0; ic < input_depth; ++ic) {
for (int m = 0; m < depth_multiplier; m++) {
const int oc = m + ic * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
float total = 0.f;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// If the location is outside the bounds of the input image,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
float input_value =
input_data[Offset(input_shape, b, in_y, in_x, ic)];
float filter_value = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, oc)];
total += (input_value * filter_value);
}
}
}
float bias_value = 0.0f;
if (bias_data) {
bias_value = bias_data[oc];
}
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
ActivationFunctionWithMinMax(total + bias_value,
output_activation_min,
output_activation_max);
}
}
}
}
}
}
} // end namespace reference_ops
} // end namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_

View File

@@ -1,319 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
#include <algorithm>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
// Used in tests and template parameters to control which version of depthwise
// convolution is called. Primarily for reference code, and specializations
// forced in tests.
enum class DepthwiseConvImplementation {
// Run all tests against kUseStandardEntry even if also testing another
// kernel, since we need to be sure that the main DepthwiseConv() function in
// optimized_ops.h dispatches to a correctly-executing kernel.
kNone = 0, // The "default" option: use the normal
// DepthwiseConv kernel (entry) function.
kUseGenericKernel, // Forced use of generic kernel.
kUseNeon3x3, // 3x3 kernel that uses NEON when available.
kUseNeon3x3DotProduct, // 3x3 kernel that uses dot-product enabled NEON
// when available.
kUseCModel3x3DotProduct, // 3x3 kernel, reference C model that is intended
// to match overall design NEON code.
kUseUnwound3x3DotProduct, // 3x3 kernel, reference C model with unwound loops
// and some arrays.
kUseIntrinsics3x3DotProduct, // 3x3 kernel using NEON intrinsics.
};
// Category of depthwise convolution output rounding.
enum class DepthwiseConvOutputRounding {
kNone = 0, // Invalid: specific method must be specified.
kAwayFromZero, // Original method: exact halves rounded away from zero.
kUpward, // Halves towards +infinity: adds 0.5 before truncate.
// This is where a future kNearestEven would be placed.
};
// Category of depthwise convolution depth multiplication.
enum class DepthwiseConvDepthMultiplication {
kNoMultiplication = 0, // Depth multiplier = 1.
kUnitInputDepth, // Input depth = 1, output depth = depth multiplier.
};
namespace reference_ops {
namespace depthwise_conv {
template <DepthwiseConvOutputRounding output_rounding>
inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
int shift) {
TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
// Single-rounding MultiplyByQuantizedMultiplier
#if TFLITE_SINGLE_ROUNDING
template <>
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
int32_t x, int32_t quantized_multiplier, int shift) {
using gemmlowp::RoundingDivideByPOT;
using gemmlowp::SaturatingRoundingDoublingHighMul;
int left_shift = shift > 0 ? shift : 0;
int right_shift = shift > 0 ? 0 : -shift;
return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
x * (1 << left_shift), quantized_multiplier),
right_shift);
}
template <>
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
int32_t x, int32_t quantized_multiplier, int shift) {
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
// Double-rounding MultiplyByQuantizedMultiplier
#else
template <>
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
int32_t x, int32_t quantized_multiplier, int shift) {
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
}
template <>
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
int32_t x, int32_t quantized_multiplier, int shift) {
using gemmlowp::SaturatingRoundingDoublingHighMul;
const int left_shift = shift > 0 ? shift : 0;
const int right_shift = shift > 0 ? 0 : -shift;
const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
quantized_multiplier) +
rounding_offset) >>
right_shift;
}
#endif // TFLITE_SINGLE_ROUNDING
template <DepthwiseConvOutputRounding output_rounding>
struct DepthwiseConvBasicKernel {
static inline void Run(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int b = 0; b < batches; ++b) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int ic = 0; ic < input_depth; ++ic) {
for (int m = 0; m < depth_multiplier; m++) {
const int oc = m + ic * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x =
in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// If the location is outside the bounds of the input image,
// use zero as a default value.
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height)) {
int32_t input_val =
input_data[Offset(input_shape, b, in_y, in_x, ic)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, oc)];
acc += (filter_val + filter_offset) *
(input_val + input_offset);
}
}
}
if (bias_data) {
acc += bias_data[oc];
}
acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
output_shift);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
static_cast<uint8_t>(acc);
}
}
}
}
}
}
// TODO(b/148596273): Reconcile reference versions, perhaps with common
// MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
static inline void RunPerChannel(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
// Get parameters.
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32_t input_offset = params.input_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
const int32_t* output_multiplier = params.output_multiplier_per_channel;
const int32_t* output_shift = params.output_shift_per_channel;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
for (int m = 0; m < depth_multiplier; ++m) {
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x =
in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
// Accumulate with 32 bits accumulator.
// In the nudging process during model quantization, we
// force real value of 0.0 be represented by a quantized
// value. This guarantees that the input_offset is a int8_t,
// even though it is represented using int32_t. int32_t +=
// int8_t
// * (int8_t - int8_t) so the highest value we can get from
// each accumulation is [-127, 127] * ([-128, 127] -
// [-128, 127]), which is [-32512, 32512]. log2(32512)
// = 14.98, which means we can accumulate at least 2^16
// multiplications without overflow. The accumulator is
// applied to a filter so the accumulation logic will hold
// as long as the filter size (filter_y * filter_x *
// in_channel) does not exceed 2^16, which is the case in
// all the models we have seen so far.
acc += filter_val * (input_val + input_offset);
}
}
}
if (bias_data) {
acc += bias_data[output_channel];
}
acc = DepthwiseConvRound<output_rounding>(
acc, output_multiplier[output_channel],
output_shift[output_channel]);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x,
output_channel)] = static_cast<int8_t>(acc);
}
}
}
}
}
}
};
} // namespace depthwise_conv
inline void DepthwiseConv(
const DepthwiseParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
return depthwise_conv::DepthwiseConvBasicKernel<
DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
input_data, filter_shape,
filter_data, bias_shape,
bias_data, output_shape,
output_data);
}
} // namespace reference_ops
} // end namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_

View File

@@ -1,78 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
#include <limits.h>
#include <vector>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
// Dequantizes into a float without rounding.
template <typename InputT, typename OutputT>
inline void Dequantize(const tflite::DequantizationParams& op_params,
const RuntimeShape& input_shape,
const InputT* input_data,
const RuntimeShape& output_shape, OutputT* output_data) {
int32_t zero_point = op_params.zero_point;
const double scale = op_params.scale;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
const int32_t val = input_data[i];
const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
output_data[i] = result;
}
}
// Dequantizes per-channel quantized tensor to float.
template <typename T>
inline void PerChannelDequantize(
const tflite::PerChannelDequantizationParams& op_params,
const RuntimeShape& input_shape, const T* input_data,
const RuntimeShape& output_shape, float* output_data) {
// Ensure flat size is same.
MatchingFlatSize(input_shape, output_shape);
const int32_t* zero_point = op_params.zero_point;
const float* scale = op_params.scale;
const int32_t quantized_dimension = op_params.quantized_dimension;
const int32_t num_dims = input_shape.DimensionsCount();
const int32_t* dims_data = input_shape.DimsData();
std::vector<int> current_dim(num_dims, 0);
do {
size_t offset =
ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
current_dim.data(), 0, nullptr);
const int channel = current_dim[quantized_dimension];
const int32_t val = input_data[offset];
const float result =
static_cast<float>(scale[channel] * (val - zero_point[channel]));
output_data[offset] = result;
} while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
current_dim.data()));
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_

View File

@@ -1,247 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void DivCheckArithmeticParams(const ArithmeticParams& params) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
constexpr int32_t max_value =
static_cast<int32_t>(std::numeric_limits<T>::max());
TFLITE_DCHECK_GE(params.input1_offset, -max_value);
TFLITE_DCHECK_LE(params.input1_offset, max_value);
TFLITE_DCHECK_GE(params.input2_offset, -max_value);
TFLITE_DCHECK_LE(params.input2_offset, max_value);
TFLITE_DCHECK_GE(params.output_offset, -max_value);
TFLITE_DCHECK_LE(params.output_offset, max_value);
}
// Element-wise div that can often be used for inner loop of broadcast Div as
// well as the non-broadcast Div.
template <typename T>
inline void DivElementwise(int size, const ArithmeticParams& params,
const T* input1_data, const T* input2_data,
T* output_data) {
DivCheckArithmeticParams<T>(params);
for (int i = 0; i < size; ++i) {
int32_t input1_val = params.input1_offset + input1_data[i];
int32_t input2_val = params.input2_offset + input2_data[i];
TFLITE_DCHECK_NE(input2_val, 0);
if (input2_val < 0) {
// Invert signs to avoid a negative input2_val as input2_inv needs to be
// positive to be used as multiplier of MultiplyByQuantizedMultiplier.
input1_val = -input1_val;
input2_val = -input2_val;
}
int recip_shift;
const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
const int headroom = CountLeadingSignBits(input1_val);
const int32_t unscaled_quotient =
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
headroom);
const int total_shift = params.output_shift - recip_shift - headroom;
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplierSmallerThanOneExp(
unscaled_quotient, params.output_multiplier, total_shift);
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<T>(clamped_output);
}
}
inline void Div(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const uint8_t* input1_data,
const RuntimeShape& input2_shape, const uint8_t* input2_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void Div(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int8_t* input1_data,
const RuntimeShape& input2_shape, const int8_t* input2_data,
const RuntimeShape& output_shape, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
}
template <typename T, int N = 5>
inline void BroadcastDivSlowQuantized(
const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape,
const T* input1_data, const RuntimeShape& unextended_input2_shape,
const T* input2_data, const RuntimeShape& unextended_output_shape,
T* output_data) {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
DivCheckArithmeticParams<T>(params);
auto div_func = [&](int indexes[N]) {
int32_t input1_val =
params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
int32_t input2_val =
params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
TFLITE_DCHECK_NE(input2_val, 0);
if (input2_val < 0) {
// Invert signs to avoid a negative input2_val as input2_inv needs to be
// positive to be used as multiplier of MultiplyByQuantizedMultiplier.
input1_val = -input1_val;
input2_val = -input2_val;
}
int recip_shift;
const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
const int headroom = CountLeadingSignBits(input1_val);
const int32_t unscaled_quotient =
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
headroom);
const int total_shift = params.output_shift - recip_shift - headroom;
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplierSmallerThanOneExp(
unscaled_quotient, params.output_multiplier, total_shift);
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[SubscriptToIndex(output_desc, indexes)] =
static_cast<T>(clamped_output);
};
NDOpsHelper<N>(output_desc, div_func);
}
template <int N = 5>
inline void BroadcastDivSlow(const ArithmeticParams& params,
const RuntimeShape& unextended_input1_shape,
const uint8_t* input1_data,
const RuntimeShape& unextended_input2_shape,
const uint8_t* input2_data,
const RuntimeShape& unextended_output_shape,
uint8_t* output_data) {
BroadcastDivSlowQuantized<uint8_t, N>(
params, unextended_input1_shape, input1_data, unextended_input2_shape,
input2_data, unextended_output_shape, output_data);
}
template <int N = 5>
inline void BroadcastDivSlow(const ArithmeticParams& params,
const RuntimeShape& unextended_input1_shape,
const int8_t* input1_data,
const RuntimeShape& unextended_input2_shape,
const int8_t* input2_data,
const RuntimeShape& unextended_output_shape,
int8_t* output_data) {
BroadcastDivSlowQuantized<int8_t, N>(
params, unextended_input1_shape, input1_data, unextended_input2_shape,
input2_data, unextended_output_shape, output_data);
}
// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
// dimensionality if the runtime code does a single loop over one dimension
// that handles broadcasting as the base case. The code generator would then
// generate max(D1, D2) nested for loops.
template <typename T, int N = 5>
void BroadcastDivSlow(const ArithmeticParams& params,
const RuntimeShape& unextended_input1_shape,
const T* input1_data,
const RuntimeShape& unextended_input2_shape,
const T* input2_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
T output_activation_min;
T output_activation_max;
GetActivationParams(params, &output_activation_min, &output_activation_max);
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest
// stride, typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
auto div_func = [&](int indexes[N]) {
output_data[SubscriptToIndex(output_desc, indexes)] =
ActivationFunctionWithMinMax(
input1_data[SubscriptToIndex(desc1, indexes)] /
input2_data[SubscriptToIndex(desc2, indexes)],
output_activation_min, output_activation_max);
};
NDOpsHelper<N>(output_desc, div_func);
}
template <typename T>
inline void Div(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
T output_activation_min;
T output_activation_max;
GetActivationParams(params, &output_activation_min, &output_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] / input2_data[i], output_activation_min,
output_activation_max);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_

View File

@@ -1,37 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void Elu(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
output_data[i] = val < 0.0f ? TfLiteExpm1(val) : val;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_

View File

@@ -1,38 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_
#include <cmath>
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void Exp(const T* input_data, const size_t num_elements,
T* output_data) {
ruy::profiler::ScopeLabel label("Exp");
for (size_t idx = 0; idx < num_elements; ++idx) {
output_data[idx] = std::exp(input_data[idx]);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_

View File

@@ -1,38 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_
#include <cmath>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
void Fill(const RuntimeShape& value_shape, const T* value_data,
const RuntimeShape& output_shape, T* output_data) {
TFLITE_DCHECK_EQ(value_shape.DimensionsCount(), 0);
const int flat_size = output_shape.FlatSize();
for (int i = 0; i < flat_size; ++i) {
output_data[i] = *value_data;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_

View File

@@ -1,39 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
#include <cmath>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void Floor(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
int offset = i;
output_data[offset] = std::floor(input_data[offset]);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_

View File

@@ -1,35 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
#include <cmath>
#include <functional>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
T FloorDiv(T input1, T input2) {
return std::floor(std::divides<double>()(static_cast<double>(input1),
static_cast<double>(input2)));
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_

View File

@@ -1,44 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
#include <cmath>
#include <functional>
namespace tflite {
namespace reference_ops {
template <typename T>
T FloorMod(T input1, T input2) {
struct FloatMod {
float operator()(const float lhs, const float rhs) const {
return std::fmod(lhs, rhs);
}
};
using ModFunc = typename std::conditional<std::is_integral<T>::value,
std::modulus<T>, FloatMod>::type;
ModFunc mod_func;
T trunc_mod = mod_func(input1, input2);
return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0))
? (trunc_mod + input2)
: trunc_mod;
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_

View File

@@ -1,323 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
#include <algorithm>
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const float* input_data, const RuntimeShape& weights_shape,
const float* weights_data, const RuntimeShape& bias_shape,
const float* bias_data, const RuntimeShape& output_shape,
float* output_data) {
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;
// TODO(b/62193649): This really should be:
// const int batches = ArraySize(output_dims, 1);
// but the current --variable_batch hack consists in overwriting the 3rd
// dimension with the runtime batch size, as we don't keep track for each
// array of which dimension is the batch dimension in it.
const int output_dims_count = output_shape.DimensionsCount();
const int weights_dims_count = weights_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
output_shape, output_dims_count - 1);
const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
float total = 0.f;
for (int d = 0; d < accum_depth; ++d) {
total += input_data[b * accum_depth + d] *
weights_data[out_c * accum_depth + d];
}
float bias_value = 0.0f;
if (bias_data) {
bias_value = bias_data[out_c];
}
output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
total + bias_value, output_activation_min, output_activation_max);
}
}
}
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
uint8_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
// TODO(b/62193649): This really should be:
// const int batches = ArraySize(output_dims, 1);
// but the current --variable_batch hack consists in overwriting the 3rd
// dimension with the runtime batch size, as we don't keep track for each
// array of which dimension is the batch dimension in it.
const int output_dim_count = output_shape.DimensionsCount();
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
output_shape, output_dim_count - 1);
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
int32_t acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias_data) {
acc += bias_data[out_c];
}
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
}
}
}
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& filter_shape,
const uint8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(output_offset, 0);
// TODO(b/62193649): This really should be:
// const int batches = ArraySize(output_dims, 1);
// but the current --variable_batch hack consists in overwriting the 3rd
// dimension with the runtime batch size, as we don't keep track for each
// array of which dimension is the batch dimension in it.
const int output_dim_count = output_shape.DimensionsCount();
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
output_shape, output_dim_count - 1);
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32_t accum = bias_data[out_c];
// Accumulation loop.
for (int d = 0; d < accum_depth; ++d) {
int16_t input_val = input_data[b * accum_depth + d] + input_offset;
int16_t filter_val =
filter_data[out_c * accum_depth + d] + filter_offset;
accum += filter_val * input_val;
}
// Down-scale the final int32_t accumulator to the scale used by our
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
// multiplier and shift here have been pre-computed offline
// (e.g. by toco).
accum =
MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
// Saturate, cast to int16_t, and store to output array.
accum = std::max(accum, output_activation_min - output_offset);
accum = std::min(accum, output_activation_max - output_offset);
accum += output_offset;
output_data[out_c + output_depth * b] = accum;
}
}
}
inline void ShuffledFullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const uint8_t* input_data, const RuntimeShape& weights_shape,
const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
// TODO(b/62193649): This really should be:
// const int batches = ArraySize(output_dims, 1);
// but the current --variable_batch hack consists in overwriting the 3rd
// dimension with the runtime batch size, as we don't keep track for each
// array of which dimension is the batch dimension in it.
const int output_dim_count = output_shape.DimensionsCount();
const int weights_dim_count = weights_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
output_shape, output_dim_count - 1);
const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
TFLITE_DCHECK((accum_depth % 16) == 0);
TFLITE_DCHECK((output_depth % 4) == 0);
// Shuffling and xoring of input activations into the workspace buffer
uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
if (batches == 1) {
for (int i = 0; i < accum_depth; i++) {
shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
}
} else if (batches == 4) {
for (int c = 0; c < accum_depth; c += 16) {
for (int b = 0; b < 4; b++) {
const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
for (int j = 0; j < 16; j++) {
uint8_t src_val = *src_data_ptr++;
// Flip the sign bit, so that the kernel will only need to
// reinterpret these uint8_t values as int8_t, getting for free the
// subtraction of the zero_point value 128.
uint8_t dst_val = src_val ^ 0x80;
*shuffled_input_workspace_ptr++ = dst_val;
}
}
}
} else {
TFLITE_DCHECK(false);
return;
}
// Actual computation
if (batches == 1) {
int16_t* output_ptr = output_data;
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
// so that just reinterpreting them as int8_t values is equivalent to
// subtracting 128 from them, thus implementing for free the subtraction of
// the zero_point value 128.
const int8_t* shuffled_weights_ptr =
reinterpret_cast<const int8_t*>(shuffled_weights_data);
// Likewise, we preshuffled and pre-xored the input data above.
const int8_t* shuffled_input_data =
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
for (int c = 0; c < output_depth; c += 4) {
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32_t accum[4] = {0};
// Accumulation loop.
for (int d = 0; d < accum_depth; d += 16) {
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 16; j++) {
int8_t input_val = shuffled_input_data[d + j];
int8_t weights_val = *shuffled_weights_ptr++;
accum[i] += weights_val * input_val;
}
}
}
for (int i = 0; i < 4; i++) {
// Add bias value
int32_t acc = accum[i] + bias_data[c + i];
// Down-scale the final int32_t accumulator to the scale used by our
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
// multiplier and shift here have been pre-computed offline
// (e.g. by toco).
acc =
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
// Saturate, cast to int16_t, and store to output array.
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_ptr[c + i] = acc;
}
}
} else if (batches == 4) {
int16_t* output_ptr = output_data;
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
// so that just reinterpreting them as int8_t values is equivalent to
// subtracting 128 from them, thus implementing for free the subtraction of
// the zero_point value 128.
const int8_t* shuffled_weights_ptr =
reinterpret_cast<const int8_t*>(shuffled_weights_data);
// Likewise, we preshuffled and pre-xored the input data above.
const int8_t* shuffled_input_data =
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
for (int c = 0; c < output_depth; c += 4) {
const int8_t* shuffled_input_ptr = shuffled_input_data;
// Accumulation loop.
// Internal accumulation.
// Initialize accumulator with the bias-value.
int32_t accum[4][4];
for (int i = 0; i < 4; i++) {
for (int b = 0; b < 4; b++) {
accum[i][b] = 0;
}
}
for (int d = 0; d < accum_depth; d += 16) {
for (int i = 0; i < 4; i++) {
for (int b = 0; b < 4; b++) {
for (int j = 0; j < 16; j++) {
int8_t input_val = shuffled_input_ptr[16 * b + j];
int8_t weights_val = shuffled_weights_ptr[16 * i + j];
accum[i][b] += weights_val * input_val;
}
}
}
shuffled_input_ptr += 64;
shuffled_weights_ptr += 64;
}
for (int i = 0; i < 4; i++) {
for (int b = 0; b < 4; b++) {
// Add bias value
int32_t acc = accum[i][b] + bias_data[c + i];
// Down-scale the final int32_t accumulator to the scale used by our
// (16-bit, typically 3 integer bits) fixed-point format. The
// quantized multiplier and shift here have been pre-computed offline
// (e.g. by toco).
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
output_shift);
// Saturate, cast to int16_t, and store to output array.
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_ptr[b * output_depth + c + i] = acc;
}
}
}
} else {
TFLITE_DCHECK(false);
return;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_

View File

@@ -1,168 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
#include <algorithm>
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline int16_t SaturatingLeftShift(int16_t value, int amount) {
int64_t result = static_cast<int64_t>(value) * (1 << amount);
result = std::min<int64_t>(result, std::numeric_limits<int16_t>::max());
result = std::max<int64_t>(result, std::numeric_limits<int16_t>::min());
return result;
}
// Similar to ARM instruction SQDMULH.
// Similar to gemmlowp::SaturatingRoundingDoublingHighMul except
// rounding to zero instead of to nearest (SQRDMULH).
inline std::int16_t SaturatingDoublingHighMul(std::int16_t a, std::int16_t b) {
bool overflow = a == b && a == std::numeric_limits<std::int16_t>::min();
std::int32_t a_32(a);
std::int32_t b_32(b);
std::int32_t ab_32 = a_32 * b_32;
std::int16_t ab_x2_high16 = static_cast<std::int16_t>((ab_32) / (1 << 15));
return overflow ? std::numeric_limits<std::int16_t>::max() : ab_x2_high16;
}
template <typename T>
inline void HardSwish(const RuntimeShape& input_shape, const T* input_data,
const RuntimeShape& output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("ReferenceHardSwish/Float");
auto matching_size = MatchingFlatSize(input_shape, output_shape);
const T* in_end = input_data + matching_size;
for (; input_data < in_end; input_data++, output_data++) {
const float in = *input_data;
*output_data =
in * std::min(static_cast<T>(6), std::max(static_cast<T>(0), in + 3)) /
6;
}
}
template <typename T>
inline void HardSwish(const HardSwishParams& params,
const RuntimeShape& input_shape, const T* input_data,
const RuntimeShape& output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("ReferenceHardSwish/Quantized");
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
const int16_t input_value = input_data[i] - params.input_zero_point;
// Left-shift as much as we can without overflow/saturation to put
// significant bits in the high bits of our 16-bit fixedpoint values, so
// that fixed-point approximate computations below are as accurate as
// possible.
const int16_t input_value_on_hires_input_scale = input_value * (1 << 7);
// Compute the input value on essentially the output scale, just not
// right-shifted yet. This is the value that we'll use in the (x >= +3)
// case, and that in the general case we'll multiply against the "relu-ish"
// fixed-point multiplier in [0, 1].
const int16_t input_value_on_preshift_output_scale =
gemmlowp::SaturatingRoundingDoublingHighMul(
input_value_on_hires_input_scale,
params.output_multiplier_fixedpoint_int16);
// Now compute the "relu-ish multiplier". In the (-3 <= x <= +3) case, that
// is just an affine rescaling of x from [-3, 3] to [0, 1]. In the general
// case, it is just that plus saturation at the boundaries of [-3, 3].
// First, we rescale from [-3, 3] to [-1, 1], saturating.
// That is done by rescaling the input value with a fixed-point multiplier
// (reluish_multiplier_fixedpoint) and bit-shift such that we represent
// that input value on the scale where the real value 3.0f is represented
// by the quantized value 32768. (+32768 is actually not representable as
// int16_t, so this saturates at +32767, and that is seen empirically to be
// a negligible contribution to numerical error/bias).
//
// This code is careful to correctly implement any magnitude of multiplier,
// involving either a right shift or a left shift, with correct saturation
// behavior in the left-shift case. This forces this code to be more
// complicated, but is necessary for real applications: a partially
// trained quantized MobileNet v3-small model that motivated this code
// exhibits some large [min, max] range boundaries, of the order of
// magnitude of 10 or 100 depending on layers.
//
// The next few lines are basically just an ordinary
// MultiplyByQuantizedMultiplier, except that we are more careful here
// about the fine details of saturation when left-shifting, because here
// overflow in left-shift is a common case, not an anomaly as
// MultiplyByQuantizedMultiplier assumes.
int16_t reluish_value = input_value_on_hires_input_scale;
// Shift left, saturating, as much as we can while ensuring that this
// saturation will not contribute to the result. That is, left shift amount
// reduced by 1.
if (params.reluish_multiplier_exponent > 0) {
reluish_value = SaturatingLeftShift(
reluish_value, params.reluish_multiplier_exponent - 1);
}
// Apply the fixed-point multiplier, dividing the value by a divisor
// ranging in [1, 2].
reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul(
reluish_value, params.reluish_multiplier_fixedpoint_int16);
// Apply the last bit of left-shift. Thus, in the left-shifting case, if
// any saturation affects the result, it is happening here --- any
// saturation having occurred above is overwritten here, not affecting the
// result.
if (params.reluish_multiplier_exponent > 0) {
reluish_value = SaturatingLeftShift(reluish_value, 1);
}
// Shift right, in the right-shifting case.
if (params.reluish_multiplier_exponent < 0) {
reluish_value = gemmlowp::RoundingDivideByPOT(
reluish_value, -params.reluish_multiplier_exponent);
}
// At this point we have rescaled the value into a 16bit fixedpoint
// reluish_value in [-1, 1].
// We now convert that to a 16bit fixedpoint value in [0, 1].
reluish_value = (reluish_value + (1 << 15)) >> 1;
// Use of SaturatingDoublingHighMul here is important to cancel the biases
// from the above SaturatingRoundingDoublingHighMul.
//
// On a partially trained MobileNet-v3-small,
//
// | bias on | ImageNet
// | quantized | Top-1
// Operation used here | values | accuracy (50k)
// --------------------------------------+------------+-----------
// SaturatingDoublingHighMul | -0.0024 | 58.920
// SaturatingRoundingDoublingHighMul | -0.0067 | 58.064
//
// In activations_test, this is covered by this testcase:
// QuantizedActivationsOpTest.HardSwishBias
//
const int16_t preshift_output_value = SaturatingDoublingHighMul(
reluish_value, input_value_on_preshift_output_scale);
// We were so far operating on the pre-shift output scale. Now we finally
// apply that output shift, arriving at the final output scale.
int16_t output_value = gemmlowp::RoundingDivideByPOT(
preshift_output_value, -params.output_multiplier_exponent);
output_value += params.output_zero_point;
output_value =
std::min<int16_t>(output_value, std::numeric_limits<T>::max());
output_value =
std::max<int16_t>(output_value, std::numeric_limits<T>::min());
output_data[i] = output_value;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_

View File

@@ -1,145 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_integer_ops {
inline void CheckArithmeticParams(const ArithmeticParams& params) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
}
inline void ElementWise(
int size, const ArithmeticParams& params, const int8_t* input1_data,
const int8_t* input2_data, int8_t* output_data,
void (*check_arithmetic_params)(const ArithmeticParams&),
int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
CheckArithmeticParams(params);
for (int i = 0; i < size; ++i) {
output_data[i] = binary_func(input1_data[i], input2_data[i], params);
}
}
inline void BroadcastBinaryFunction4DSlow(
const ArithmeticParams& params, const RuntimeShape& input1_shape,
const int8_t* input1_data, const RuntimeShape& input2_shape,
const int8_t* input2_data, const RuntimeShape& output_shape,
int8_t* output_data,
void (*check_arithmetic_params)(const ArithmeticParams&),
int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
output_data[Offset(extended_output_shape, b, y, x, c)] = binary_func(
input1_data[SubscriptToIndex(desc1, b, y, x, c)],
input2_data[SubscriptToIndex(desc2, b, y, x, c)], params);
}
}
}
}
}
inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) {
const int32_t input1_val = params.input1_offset + x;
const int32_t input2_val = params.input2_offset + y;
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
raw_sum, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
return static_cast<int8_t>(clamped_output);
}
// Element-wise add that can often be used for inner loop of broadcast add as
// well as the non-broadcast add.
inline void AddElementwise(int size, const ArithmeticParams& params,
const int8_t* input1_data, const int8_t* input2_data,
int8_t* output_data) {
ElementWise(size, params, input1_data, input2_data, output_data,
CheckArithmeticParams, AddFunc);
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int8_t* input1_data,
const RuntimeShape& input2_shape, const int8_t* input2_data,
const RuntimeShape& output_shape, int8_t* output_data) {
CheckArithmeticParams(params);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int8_t* input1_data,
const RuntimeShape& input2_shape,
const int8_t* input2_data,
const RuntimeShape& output_shape,
int8_t* output_data) {
BroadcastBinaryFunction4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data,
CheckArithmeticParams, AddFunc);
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_

View File

@@ -1,238 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
// Fixed-point per-channel-quantization convolution reference kernel.
inline void ConvPerChannel(
const ConvParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
// Get parameters.
const int32_t input_offset = params.input_offset; // r = s(q - Z)
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int32_t output_offset = params.output_offset;
// Set min and max value of the output.
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Consistency check.
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = input_shape.Dims(3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
// Check dimensions of the tensors.
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
const int in_y_origin = (out_y * stride_height) - pad_height;
for (int out_x = 0; out_x < output_width; ++out_x) {
const int in_x_origin = (out_x * stride_width) - pad_width;
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
auto group = out_channel / filters_per_group;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
const int in_y = in_y_origin + dilation_height_factor * filter_y;
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (!is_point_inside_image) {
continue;
}
for (int in_channel = 0; in_channel < filter_input_depth;
++in_channel) {
int32_t input_val =
input_data[Offset(input_shape, batch, in_y, in_x,
in_channel + group * filter_input_depth)];
int32_t filter_val = filter_data[Offset(
filter_shape, out_channel, filter_y, filter_x, in_channel)];
// Accumulate with 32 bits accumulator.
// In the nudging process during model quantization, we force
// real value of 0.0 be represented by a quantized value. This
// guarantees that the input_offset is a int8_t, even though
// it is represented using int32_t. int32_t += int8_t *
// (int8_t - int8_t) so the highest value we can get from each
// accumulation is [-127, 127] * ([-128, 127] -
// [-128, 127]), which is [-32512, 32512]. log2(32512)
// = 14.98, which means we can accumulate at least 2^16
// multiplications without overflow. The accumulator is
// applied to a filter so the accumulation logic will hold as
// long as the filter size (filter_y * filter_x * in_channel)
// does not exceed 2^16, which is the case in all the models
// we have seen so far.
// TODO(b/174275578): Add a check to make sure the
// accumulator depth is smaller than 2^16.
acc += filter_val * (input_val + input_offset);
}
}
}
if (bias_data) {
acc += bias_data[out_channel];
}
acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[out_channel], output_shift[out_channel]);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
static_cast<int8_t>(acc);
}
}
}
}
}
// Fixed-point per-channel-quantization convolution reference kernel.
// 16-bit data and 8-bit filter
template <typename AccumScalar>
inline void ConvPerChannel(
const ConvParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const AccumScalar* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
// Get parameters.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
// Set min and max value of the output.
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Consistency check.
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = input_shape.Dims(3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
// Check dimensions of the tensors.
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int filter_input_depth = filter_shape.Dims(3);
const int groups = input_depth / filter_input_depth;
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
const int filters_per_group = output_depth / groups;
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
const int in_y_origin = (out_y * stride_height) - pad_height;
for (int out_x = 0; out_x < output_width; ++out_x) {
const int in_x_origin = (out_x * stride_width) - pad_width;
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
auto group = out_channel / filters_per_group;
AccumScalar acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
const int in_y = in_y_origin + dilation_height_factor * filter_y;
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (!is_point_inside_image) {
continue;
}
for (int in_channel = 0; in_channel < filter_input_depth;
++in_channel) {
int32_t input_val =
input_data[Offset(input_shape, batch, in_y, in_x,
in_channel + group * filter_input_depth)];
int32_t filter_val = filter_data[Offset(
filter_shape, out_channel, filter_y, filter_x, in_channel)];
// Accumulate with 64 bits accumulator.
// int64_t += int8_t * int16_t so the highest value we can
// get from each accumulation is [-127, 127] * ([-32768,
// 32767] -
// [-32768, 32767]), which is [-8322945, 8322945].
// log2(8322945) = 22.99.
acc += filter_val * input_val;
}
}
}
if (bias_data) {
acc += bias_data[out_channel];
}
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[out_channel], output_shift[out_channel]);
scaled_acc = std::max(scaled_acc, output_activation_min);
scaled_acc = std::min(scaled_acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
static_cast<int16_t>(scaled_acc);
}
}
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_

View File

@@ -1,291 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void DepthwiseConvPerChannel(
const DepthwiseParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
// Get parameters.
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32_t input_offset = params.input_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
for (int m = 0; m < depth_multiplier; ++m) {
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
// Accumulate with 32 bits accumulator.
// In the nudging process during model quantization, we force
// real value of 0.0 be represented by a quantized value. This
// guarantees that the input_offset is a int8_t, even though
// it is represented using int32_t. int32_t += int8_t *
// (int8_t - int8_t) so the highest value we can get from each
// accumulation is [-127, 127] * ([-128, 127] -
// [-128, 127]), which is [-32512, 32512]. log2(32512)
// = 14.98, which means we can accumulate at least 2^16
// multiplications without overflow. The accumulator is
// applied to a filter so the accumulation logic will hold as
// long as the filter size (filter_y * filter_x * in_channel)
// does not exceed 2^16, which is the case in all the models
// we have seen so far.
// TODO(b/174275578): Add a check to make sure the
// accumulator depth is smaller than 2^16.
acc += filter_val * (input_val + input_offset);
}
}
}
if (bias_data) {
acc += bias_data[output_channel];
}
acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[output_channel],
output_shift[output_channel]);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x,
output_channel)] = static_cast<int8_t>(acc);
}
}
}
}
}
}
inline void DepthwiseConvPerChannel(
const DepthwiseParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const std::int64_t* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
// Get parameters.
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
for (int m = 0; m < depth_multiplier; ++m) {
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
std::int64_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
// Accumulate with 64 bits accumulator.
// We assume maximum of 2^16 accumulations as with the 8-bit
// case so actually the value in the accumulator should not
// exceed 40 bits
acc += static_cast<int64_t>(filter_val) *
static_cast<int64_t>(input_val);
}
}
}
if (bias_data) {
acc += bias_data[output_channel];
}
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[output_channel],
output_shift[output_channel]);
scaled_acc = std::max(scaled_acc, output_activation_min);
scaled_acc = std::min(scaled_acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x,
output_channel)] =
static_cast<int16_t>(scaled_acc);
}
}
}
}
}
}
inline void DepthwiseConvHybridPerChannel(
const DepthwiseParams& params, float* scaling_factors_ptr,
const RuntimeShape& input_shape, const int8_t* input_data,
const RuntimeShape& filter_shape, const int8_t* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data,
const float* per_channel_scale, int32_t* input_offset) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int depth_multiplier = params.depth_multiplier;
const float output_activation_min = params.float_activation_min;
const float output_activation_max = params.float_activation_max;
// Check dimensions of the tensors.
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int bias_depth = bias_shape.FlatSize();
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
TFLITE_DCHECK_EQ(bias_depth, output_depth);
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
for (int m = 0; m < depth_multiplier; ++m) {
const int output_channel = m + in_channel * depth_multiplier;
const int in_x_origin = (out_x * stride_width) - pad_width;
const int in_y_origin = (out_y * stride_height) - pad_height;
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
const int in_x = in_x_origin + dilation_width_factor * filter_x;
const int in_y =
in_y_origin + dilation_height_factor * filter_y;
// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
(in_y < input_height);
if (is_point_inside_image) {
int32_t input_val = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val = filter_data[Offset(
filter_shape, 0, filter_y, filter_x, output_channel)];
acc += filter_val * (input_val - input_offset[batch]);
}
}
}
float acc_float = static_cast<float>(acc);
acc_float *=
per_channel_scale[output_channel] * scaling_factors_ptr[batch];
if (bias_data && output_channel < bias_depth) {
acc_float += bias_data[output_channel];
}
output_data[Offset(output_shape, batch, out_y, out_x,
output_channel)] =
ActivationFunctionWithMinMax(acc_float, output_activation_min,
output_activation_max);
}
}
}
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_

View File

@@ -1,201 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
// For per-channel functions, since it is defined in quantization spec that
// weights are symmetric
// (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric),
// zero_point (params.weights_offset) is always 0.
// However, for per-tensor functions, params.weights_offset is still applied for
// backward compatibility.
inline void FullyConnectedPerChannel(
const FullyConnectedParams& params, const int32_t* output_multiplier,
const int* output_shift, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = output_shape.Dims(0);
const int output_depth = output_shape.Dims(1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
int32_t acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += filter_val * (input_val + input_offset);
}
if (bias_data) {
acc += bias_data[out_c];
}
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c],
output_shift[out_c]);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
}
}
}
template <typename AccumScalar>
inline void FullyConnectedPerChannel(
const FullyConnectedParams& params, const int32_t* output_multiplier,
const int* output_shift, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const AccumScalar* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();
const int output_dim_count = output_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = output_shape.Dims(output_dim_count - 1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
AccumScalar acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += filter_val * input_val;
}
if (bias_data) {
acc += bias_data[out_c];
}
int32_t acc_scaled = MultiplyByQuantizedMultiplier(
acc, output_multiplier[out_c], output_shift[out_c]);
acc_scaled = std::max(acc_scaled, output_activation_min);
acc_scaled = std::min(acc_scaled, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
}
}
}
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data) {
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();
const int output_dim_count = output_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = output_shape.Dims(output_dim_count - 1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
int32_t acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias_data) {
acc += bias_data[out_c];
}
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
}
}
}
template <typename AccumScalar>
inline void FullyConnected(
const FullyConnectedParams& params, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const AccumScalar* bias_data, const RuntimeShape& output_shape,
int16_t* output_data) {
const int32_t filter_offset = params.weights_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int filter_dim_count = filter_shape.DimensionsCount();
const int output_dim_count = output_shape.DimensionsCount();
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
const int output_depth = output_shape.Dims(output_dim_count - 1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
for (int b = 0; b < batches; ++b) {
for (int out_c = 0; out_c < output_depth; ++out_c) {
AccumScalar acc = 0;
for (int d = 0; d < accum_depth; ++d) {
int32_t input_val = input_data[b * accum_depth + d];
int32_t filter_val = filter_data[out_c * accum_depth + d];
acc += (filter_val + filter_offset) * input_val;
}
if (bias_data) {
acc += bias_data[out_c];
}
int32_t acc_scaled =
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
acc_scaled = std::max(acc_scaled, output_activation_min);
acc_scaled = std::min(acc_scaled, output_activation_max);
output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_

View File

@@ -1,67 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
int32_t depth, const int8_t* input_data,
int8_t* output_data) {
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
// The output scale must be in sync with Prepare().
// Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
// to [-1, 127/128].
static constexpr int32_t kOutputScale = 7;
for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
// int32_t = (int8_t - int8_t) ^ 2.
// ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
// safe from overflowing in at least 2^16 steps.
int32_t acc = 0;
for (int inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input =
input_data[depth * outer_index + inner_index] - input_zero_point;
acc += input * input;
}
int32_t inv_l2norm_multiplier;
int inv_l2norm_shift;
GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
&inv_l2norm_shift);
for (int inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input =
input_data[depth * outer_index + inner_index] - input_zero_point;
// Rescale and downcast. Rescale is folded into the division.
int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
output_in_q24 =
std::min(static_cast<int32_t>(kMaxInt8),
std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
output_data[depth * outer_index + inner_index] =
static_cast<int8_t>(output_in_q24);
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_

View File

@@ -1,121 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
int32_t input_multiplier, int32_t input_left_shift,
int32_t input_size, const int8_t* input_data,
int8_t* output_data) {
// Integer bits must be in sync with Prepare() function.
static constexpr int32_t kInputIntegerBits = 4;
static constexpr int32_t kOutputIntegerBits = 8;
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
static constexpr int32_t kOutputZeroPoint = -128;
for (int i = 0; i < input_size; ++i) {
const int32_t input =
static_cast<int32_t>(input_data[i]) - input_zero_point;
if (input <= -input_range_radius) {
output_data[i] = kMinInt8;
} else if (input >= input_range_radius) {
output_data[i] = kMaxInt8;
} else {
const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
input, input_multiplier, input_left_shift);
using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
const int32_t output_in_q0 =
gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
// Rescale and downcast.
using gemmlowp::RoundingDivideByPOT;
int32_t output_in_q23 =
RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
static_cast<int32_t>(kMinInt8)),
static_cast<int32_t>(kMaxInt8));
output_data[i] = static_cast<int8_t>(output_in_q23);
}
}
}
inline void Logistic(int32_t input_multiplier, int32_t input_left_shift,
int32_t input_size, const int16_t* ptr_input_data,
int16_t* ptr_output_data) {
// We use the LUT for sigmoid and take into account, that
// tanh(x) = 2*sigmoid(2*x) - 1
// We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
// In case of general parameter scale, multiplier 3 is taken into account
// in TanhPrepare function and it is included in
// input_multiplier already.
TFLITE_DCHECK_GE(input_left_shift, 0);
if (input_multiplier == 0) { // power of two case
input_multiplier = 3 << input_left_shift;
input_left_shift = 0;
}
int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
int32_t input_data =
((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
// We do interpolation on unsigned values.
uint32_t abs_input_data = abs(input_data);
// We divide by 2 power of 9, because
// we need to divide by 2 in power of 7 for
// the input conversion + 1/4 from the scale above.
// Define uh as uint32_t type not to make this function overflow.
uint32_t uh = abs_input_data >> 9;
uint32_t result;
if (uh >= 255) {
// Saturate to maximum.
result = 0x7FFF << 10;
} else {
uint32_t ua = sigmoid_table_uint16[uh];
uint32_t ub = sigmoid_table_uint16[uh + 1];
uint32_t ut = abs_input_data & 0x1ff;
// Interpolation is done using the fractional bit.
result = (ua << 9) + ut * (ub - ua);
}
result = (input_data >= 0) ? (result + (1 << 9))
: ((1 << (16 + 9)) - result + (1 << 9) - 1);
// Back to 16-bit.
result >>= 10;
*ptr_output_data = result;
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_

View File

@@ -1,79 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
template <typename integer_type>
inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
int32_t shift, const RuntimeShape& unextended_input_shape,
const integer_type* input_data, int32_t input_zero_point,
const RuntimeShape& unextended_output_shape,
integer_type* output_data, int32_t output_zero_point) {
// Current implementation only supports dimension equals 4 and simultaneous
// reduction over width and height.
TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int output_batch = output_shape.Dims(0);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int output_depth = output_shape.Dims(3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int num_elements_in_axis = input_width * input_height;
TFLITE_CHECK_EQ(op_params.axis_count, 2);
TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
TFLITE_CHECK_EQ(output_height, 1);
TFLITE_CHECK_EQ(output_width, 1);
static constexpr int32_t kMinInt = std::numeric_limits<integer_type>::min();
static constexpr int32_t kMaxInt = std::numeric_limits<integer_type>::max();
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
int32_t acc = 0;
for (int in_h = 0; in_h < input_height; ++in_h) {
for (int in_w = 0; in_w < input_width; ++in_w) {
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] -
input_zero_point;
}
}
acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
: (acc - num_elements_in_axis / 2) / num_elements_in_axis;
acc += output_zero_point;
acc = std::min(std::max(acc, kMinInt), kMaxInt);
output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
static_cast<integer_type>(acc);
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_

View File

@@ -1,133 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
#include <algorithm>
#include "fixedpoint/fixedpoint.h"
#include "ruy/profiler/instrumentation.h" // from @ruy
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
template <typename T>
inline void MulElementwise(int size, const ArithmeticParams& params,
const T* input1_data, const T* input2_data,
T* output_data) {
for (int i = 0; i < size; ++i) {
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplier(input1_val * input2_val,
params.output_multiplier,
params.output_shift);
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<T>(clamped_output);
}
}
template <typename T>
inline void Mul(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
ruy::profiler::ScopeLabel label("Mul/8bit");
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
MulElementwise(flat_size, params, input1_data, input2_data, output_data);
}
// Mul with 16 bit inputs and int8_t outputs.
inline void Mul(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int16_t* input1_data,
const RuntimeShape& input2_shape, const int16_t* input2_data,
const RuntimeShape& output_shape, int8_t* output_data) {
ruy::profiler::ScopeLabel label("Mul/Int16Int8");
int32_t output_offset = params.output_offset;
int32_t output_activation_min = params.quantized_activation_min;
int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
// F0 uses 0 integer bits, range [-1, 1].
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
F0 unclamped_result =
F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
int16_t rescaled_result =
gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
int16_t clamped_result = std::min<int16_t>(
output_activation_max - output_offset, rescaled_result);
clamped_result = std::max<int16_t>(output_activation_min - output_offset,
clamped_result);
output_data[i] = output_offset + clamped_result;
}
}
template <typename T>
inline void BroadcastMul4DSlow(
const ArithmeticParams& params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
ruy::profiler::ScopeLabel label("BroadcastMul4DSlow");
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
// The input shapes are extended as part of NdArrayDesc initialization.
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
const int32_t input1_val =
params.input1_offset +
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
const int32_t input2_val =
params.input2_offset +
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplier(input1_val * input2_val,
params.output_multiplier,
params.output_shift);
const int32_t clamped_output = std::min(
params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[Offset(extended_output_shape, b, y, x, c)] =
static_cast<T>(clamped_output);
}
}
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_

View File

@@ -1,264 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline bool AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const int8_t* input_data,
const RuntimeShape& output_shape, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int32_t acc = 0;
int filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
acc +=
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
filter_count++;
}
}
if (filter_count == 0) return false;
// Round to the closest integer value.
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
: (acc - filter_count / 2) / filter_count;
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int8_t>(acc);
}
}
}
}
return true;
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_GE(params.quantized_activation_min,
std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(params.quantized_activation_max,
std::numeric_limits<int8_t>::max());
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int8_t max = std::numeric_limits<int8_t>::lowest();
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
max = std::max(
max,
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
}
}
max = std::max<int8_t>(max, params.quantized_activation_min);
max = std::min<int8_t>(max, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int8_t>(max);
}
}
}
}
}
inline bool AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const int16_t* input_data,
const RuntimeShape& output_shape,
int16_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int32_t acc = 0;
int filter_count = 0;
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
acc +=
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
filter_count++;
}
}
if (filter_count == 0) return false;
// Round to the closest integer value.
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
: (acc - filter_count / 2) / filter_count;
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int16_t>(acc);
}
}
}
}
return true;
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& output_shape,
int16_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
TFLITE_DCHECK_GE(params.quantized_activation_min,
std::numeric_limits<int16_t>::min());
TFLITE_DCHECK_LE(params.quantized_activation_max,
std::numeric_limits<int16_t>::max());
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int stride_height = params.stride_height;
const int stride_width = params.stride_width;
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int channel = 0; channel < depth; ++channel) {
const int in_x_origin =
(out_x * stride_width) - params.padding_values.width;
const int in_y_origin =
(out_y * stride_height) - params.padding_values.height;
// Compute the boundaries of the filter region clamped so as to
// ensure that the filter window fits in the input array.
const int filter_x_start = std::max(0, -in_x_origin);
const int filter_x_end =
std::min(params.filter_width, input_width - in_x_origin);
const int filter_y_start = std::max(0, -in_y_origin);
const int filter_y_end =
std::min(params.filter_height, input_height - in_y_origin);
int16_t max = std::numeric_limits<int16_t>::lowest();
for (int filter_y = filter_y_start; filter_y < filter_y_end;
++filter_y) {
for (int filter_x = filter_x_start; filter_x < filter_x_end;
++filter_x) {
const int in_x = in_x_origin + filter_x;
const int in_y = in_y_origin + filter_y;
max = std::max(
max,
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
}
}
max = std::max<int16_t>(max, params.quantized_activation_min);
max = std::min<int16_t>(max, params.quantized_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
static_cast<int16_t>(max);
}
}
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_

View File

@@ -1,117 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
#include <algorithm>
#include <limits>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
int32_t input_multiplier, int32_t input_shift,
const RuntimeShape& input_shape, const int8_t* input_data,
const RuntimeShape& output_shape, int8_t* output_data) {
// Integer bits must be in sync with Prepare() function.
static constexpr int32_t kInputIntegerBits = 4;
static constexpr int32_t kOutputScale = 7;
static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t input =
static_cast<int32_t>(input_data[i]) - input_zero_point;
if (input <= -input_range_radius) {
output_data[i] = kMinInt8;
} else if (input >= input_range_radius) {
output_data[i] = kMaxInt8;
} else {
const int32_t input_in_q4 =
MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
const int32_t output_in_q0 =
gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
// Rescale and downcast.
using gemmlowp::RoundingDivideByPOT;
int32_t output_in_q24 =
RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
output_data[i] = static_cast<int8_t>(output_in_q24);
}
}
}
inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
const RuntimeShape& input_shape, const int16_t* ptr_input_data,
const RuntimeShape& output_shape, int16_t* ptr_output_data) {
// We use the LUT for sigmoid and take into account, that
// tanh(x) = 2*sigmoid(2*x) - 1
// We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
// In case of general parameter scale, multiplier 3 is taken into account
// in TanhPrepare function and it is included in
// input_multiplier already.
if (input_multiplier == 0) { // power of two case
input_multiplier = 3 << input_left_shift;
input_left_shift = 0;
}
int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;
int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) {
int32_t input_data =
((*ptr_input_data) * input_multiplier + round) >> input_left_shift;
uint32_t abs_input_data = abs(input_data);
uint32_t uh = abs_input_data >> 8;
int32_t result;
if (uh >= 255) {
// Saturate to maximum.
result = 0xFFFF << 8;
} else {
uint32_t ua = sigmoid_table_uint16[uh];
uint32_t ub = sigmoid_table_uint16[uh + 1];
uint8_t ut = abs_input_data & 0xFF;
result = (ua << 8) + ut * (ub - ua);
}
result = (input_data >= 0)
? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
: (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
// Convert back to 16-bit.
result >>= (9 - 1);
*ptr_output_data = result;
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_

View File

@@ -1,224 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_integer_ops {
// Fixed-point per-channel-quantization transpose convolution reference kernel.
inline void TransposeConv(
const ConvParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const int32_t* bias_data, const RuntimeShape& output_shape,
int8_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
int32_t* scratch_buffer) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
(void)im2col_data; // only used in optimized code.
(void)im2col_shape; // only used in optimized code.
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int32_t input_offset = params.input_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_activation_min = std::numeric_limits<int8_t>::min();
const int32_t output_activation_max = std::numeric_limits<int8_t>::max();
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int num_elements = output_shape.FlatSize();
// We need to initialize scratch_buffer to all 0s, as we apply the same
// 'scatter' based trick as in float version.
memset(scratch_buffer, 0, num_elements * sizeof(int32_t));
// Loop through input elements one at a time.
for (int batch = 0; batch < batches; ++batch) {
for (int in_y = 0; in_y < input_height; ++in_y) {
for (int in_x = 0; in_x < input_width; ++in_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
// Loop through the output elements it will influence.
const int out_x_origin = (in_x * stride_width) - pad_width;
const int out_y_origin = (in_y * stride_height) - pad_height;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int out_channel = 0; out_channel < output_depth;
++out_channel) {
// Compute output element location.
const int out_x = out_x_origin + filter_x;
const int out_y = out_y_origin + filter_y;
// We cannot accumulate out of bounds.
if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
(out_y < output_height)) {
const int8_t input_value = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
const int8_t filter_value =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
scratch_buffer[Offset(output_shape, batch, out_y, out_x,
out_channel)] +=
(input_value + input_offset) * filter_value;
}
}
}
}
}
}
}
}
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
out_channel)];
if (bias_data) {
acc += bias_data[out_channel];
}
acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[out_channel], output_shift[out_channel]);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
static_cast<int8_t>(acc);
}
}
}
}
}
// int16_t input (zero_point=0), int8_t filter, int32 or int64 accumulator
template <typename Scalar>
inline void TransposeConv(
const ConvParams& params, const int32_t* output_multiplier,
const int32_t* output_shift, const RuntimeShape& input_shape,
const int16_t* input_data, const RuntimeShape& filter_shape,
const int8_t* filter_data, const RuntimeShape& bias_shape,
const Scalar* bias_data, const RuntimeShape& output_shape,
int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
Scalar* scratch_buffer) {
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
(void)im2col_data; // only used in optimized code.
(void)im2col_shape; // only used in optimized code.
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data) {
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
}
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int32_t output_activation_min = std::numeric_limits<int16_t>::min();
const int32_t output_activation_max = std::numeric_limits<int16_t>::max();
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
const int num_elements = output_shape.FlatSize();
// We need to initialize scratch_buffer to all 0s, as we apply the same
// 'scatter' based trick as in float version.
memset(scratch_buffer, 0, num_elements * sizeof(Scalar));
// Loop through input elements one at a time.
for (int batch = 0; batch < batches; ++batch) {
for (int in_y = 0; in_y < input_height; ++in_y) {
for (int in_x = 0; in_x < input_width; ++in_x) {
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
// Loop through the output elements it will influence.
const int out_x_origin = (in_x * stride_width) - pad_width;
const int out_y_origin = (in_y * stride_height) - pad_height;
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
for (int out_channel = 0; out_channel < output_depth;
++out_channel) {
// Compute output element location.
const int out_x = out_x_origin + filter_x;
const int out_y = out_y_origin + filter_y;
// We cannot accumulate out of bounds.
if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
(out_y < output_height)) {
const int32_t input_value = input_data[Offset(
input_shape, batch, in_y, in_x, in_channel)];
const int32_t filter_value =
filter_data[Offset(filter_shape, out_channel, filter_y,
filter_x, in_channel)];
scratch_buffer[Offset(output_shape, batch, out_y, out_x,
out_channel)] +=
input_value * filter_value;
}
}
}
}
}
}
}
}
for (int batch = 0; batch < batches; ++batch) {
for (int out_y = 0; out_y < output_height; ++out_y) {
for (int out_x = 0; out_x < output_width; ++out_x) {
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
Scalar acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
out_channel)];
if (bias_data) {
acc += bias_data[out_channel];
}
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
acc, output_multiplier[out_channel], output_shift[out_channel]);
scaled_acc = std::max(scaled_acc, output_activation_min);
scaled_acc = std::min(scaled_acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
static_cast<int16_t>(scaled_acc);
}
}
}
}
}
} // namespace reference_integer_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_

View File

@@ -1,90 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
#include <algorithm>
#include <cmath>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
const RuntimeShape& input_shape,
const float* input_data,
const RuntimeShape& output_shape,
float* output_data, float epsilon = 1e-6) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
for (int i = 0; i < outer_size; ++i) {
float squared_l2_norm = 0;
for (int c = 0; c < depth; ++c) {
const float val = input_data[depth * i + c];
squared_l2_norm += val * val;
}
float l2_norm = std::sqrt(squared_l2_norm);
l2_norm = std::max(l2_norm, epsilon);
for (int c = 0; c < depth; ++c) {
output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
}
}
}
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
const RuntimeShape& input_shape,
const uint8_t* input_data,
const RuntimeShape& output_shape,
uint8_t* output_data) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int32_t input_zero_point = op_params.input_zero_point;
for (int i = 0; i < outer_size; ++i) {
int32_t square_l2_norm = 0;
for (int c = 0; c < depth; c++) {
int32_t diff = input_data[depth * i + c] - input_zero_point;
square_l2_norm += diff * diff;
}
int32_t inv_l2norm_multiplier;
int inv_l2norm_shift;
GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
&inv_l2norm_multiplier, &inv_l2norm_shift);
for (int c = 0; c < depth; c++) {
int32_t diff = input_data[depth * i + c] - input_zero_point;
int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
int32_t unclamped_output_val = 128 + rescaled_diff;
int32_t output_val =
std::min(static_cast<int32_t>(255),
std::max(static_cast<int32_t>(0), unclamped_output_val));
output_data[depth * i + c] = static_cast<uint8_t>(output_val);
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_

View File

@@ -1,69 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
inline void LeakyRelu(const tflite::LeakyReluParams& params,
const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
// Note that alpha might be > 1 or < 0, so we don't use std::max here.
output_data[i] = val > 0 ? val : val * params.alpha;
}
}
template <typename T>
inline void QuantizeLeakyRelu(const LeakyReluParams& params,
const RuntimeShape& input_shape,
const T* input_data,
const RuntimeShape& output_shape,
T* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
static const int32_t quantized_min = std::numeric_limits<T>::min();
static const int32_t quantized_max = std::numeric_limits<T>::max();
for (int i = 0; i < flat_size; ++i) {
const int32_t input_value = input_data[i] - params.input_offset;
int32_t unclamped_output;
if (input_value >= 0) {
unclamped_output = params.output_offset +
MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_identity,
params.output_shift_identity);
} else {
unclamped_output = params.output_offset +
MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_alpha,
params.output_shift_alpha);
}
const T clamped_output =
std::min(quantized_max, std::max(quantized_min, unclamped_output));
output_data[i] = static_cast<T>(clamped_output);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_

View File

@@ -1,256 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
#include <algorithm>
#include <cstddef>
#include <limits>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
inline void LogSoftmax(const SoftmaxParams& params,
const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
for (int i = 0; i < outer_size; ++i) {
// Find max element value which we'll use to ensure numerical stability
// taking advantage of the following equality:
// log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
float max = std::numeric_limits<float>::lowest();
for (int c = 0; c < depth; ++c) {
max = std::max(max, input_data[i * depth + c]);
}
// Compute sum.
float sum = 0.f;
for (int c = 0; c < depth; ++c) {
sum += std::exp(input_data[i * depth + c] - max);
}
// Compute result.
const float log_sum = std::log(sum);
for (int c = 0; c < depth; ++c) {
output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
}
}
}
inline void LogSoftmax(const SoftmaxParams& params,
const RuntimeShape& input_shape,
const uint8_t* input_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
const int32_t input_multiplier = params.input_multiplier;
const int32_t input_left_shift = params.input_left_shift;
const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
const int32_t reverse_scaling_right_shift =
params.reverse_scaling_right_shift;
const int diff_min = params.diff_min;
// The representation chosen for the input to the exp() function is Q5.26.
// We need to leave extra space since values that we skip might be as large
// as -32 before multiplying by input_beta_multiplier, and therefore as
// large as -16 afterwards. Note that exp(-8) is definitely not
// insignificant to accumulation, but exp(-16) definitely is.
static constexpr int kScaledDiffIntegerBits = 5;
static constexpr int kAccumulationIntegerBits = 12;
static constexpr int kOutputIntegerBits = 4;
using FixedPointScaledDiff =
gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
using FixedPointAccum =
gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
for (int i = 0; i < outer_size; ++i) {
uint8_t max_in_row = 0;
for (int c = 0; c < depth; ++c) {
max_in_row = std::max(max_in_row, input_data[i * depth + c]);
}
FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
for (int c = 0; c < depth; ++c) {
int32_t input_diff =
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
if (input_diff >= diff_min) {
const int32_t input_diff_rescaled =
MultiplyByQuantizedMultiplierGreaterThanOne(
input_diff, input_multiplier, input_left_shift);
const FixedPointScaledDiff scaled_diff_f8 =
FixedPointScaledDiff::FromRaw(input_diff_rescaled);
sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
exp_on_negative_values(scaled_diff_f8));
}
}
const int32_t fixed_log_sum_of_exps =
log_x_for_x_greater_than_or_equal_to_1<kScaledDiffIntegerBits>(
sum_of_exps)
.raw();
// rescaled_diff_min is smallest representable in
// Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the
// log-sub-exps that will be subtracted in the loop.
//
// The thresholds diff_min, etc are negative.
const int rescaled_diff_min =
fixed_log_sum_of_exps + std::numeric_limits<int32_t>::lowest();
const int adjusted_diff_min =
std::max(static_cast<int32_t>(
diff_min - 1), // Note use of > below instead of >= above.
MultiplyByQuantizedMultiplierSmallerThanOneExp(
rescaled_diff_min, reverse_scaling_divisor,
-reverse_scaling_right_shift));
for (int c = 0; c < depth; ++c) {
int32_t input_diff =
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
if (input_diff > adjusted_diff_min) {
const int32_t input_diff_rescaled =
MultiplyByQuantizedMultiplierGreaterThanOne(
input_diff, input_multiplier, input_left_shift);
int32_t unsat_output =
gemmlowp::RoundingDivideByPOT(
(input_diff_rescaled - fixed_log_sum_of_exps),
31 - kScaledDiffIntegerBits - kOutputIntegerBits) +
255;
output_data[i * depth + c] = static_cast<uint8_t>(
std::max(std::min(unsat_output, static_cast<int32_t>(255)),
static_cast<int32_t>(0)));
} else {
// Set output to smallest value.
output_data[i * depth + c] = 0;
}
}
}
}
template <typename T>
inline void LogSoftmaxQuantized(const SoftmaxParams& params,
const size_t outer_size, const size_t depth,
const RuntimeShape& input_shape,
const T* input_data,
const RuntimeShape& output_shape,
T* output_data) {
const int32_t input_multiplier = params.input_multiplier;
const int32_t input_left_shift = params.input_left_shift;
const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
const int32_t reverse_scaling_right_shift =
params.reverse_scaling_right_shift;
const int diff_min = params.diff_min;
static constexpr T kMinT8 = std::numeric_limits<T>::min();
static constexpr T kMaxT8 = std::numeric_limits<T>::max();
static constexpr int32_t kMinInt32 = std::numeric_limits<int32_t>::min();
// All IntegerBits must agree with Prepare function.
// Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible.
static constexpr int kInputIntegerBits = 5;
static constexpr int kAccumulationIntegerBits = 12;
static constexpr int kOutputIntegerBits = 4;
using F5 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
using F12 = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
for (size_t outer_index = 0; outer_index < outer_size; ++outer_index) {
T max_in_row = kMinT8;
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
max_in_row =
std::max(max_in_row, input_data[outer_index * depth + inner_index]);
}
// Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps.
F12 sum_of_exps_in_q12 = F12::FromRaw(0);
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input_diff =
static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
max_in_row;
if (input_diff >= diff_min) {
const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
input_diff, input_multiplier, input_left_shift);
sum_of_exps_in_q12 =
sum_of_exps_in_q12 +
gemmlowp::Rescale<kAccumulationIntegerBits>(
exp_on_negative_values(F5::FromRaw(input_diff_in_q5)));
}
}
const int32_t log_sum_of_exps_in_q5 =
log_x_for_x_greater_than_or_equal_to_1<kInputIntegerBits>(
sum_of_exps_in_q12)
.raw();
// Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is
// smallest representable in Q5.26 plus the log_sum_of_exps.
const int32_t shifted_log_sum_of_exps_in_q5 =
log_sum_of_exps_in_q5 + kMinInt32;
const int32_t adjusted_diff_min =
std::max(static_cast<int32_t>(diff_min - 1),
MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5,
reverse_scaling_divisor,
-reverse_scaling_right_shift));
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input_diff =
static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
max_in_row;
// Note use of > below instead of >= above.
if (input_diff > adjusted_diff_min) {
const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
input_diff, input_multiplier, input_left_shift);
// Rescale and downcast.
int32_t output_in_q27 =
gemmlowp::RoundingDivideByPOT(
(input_diff_in_q5 - log_sum_of_exps_in_q5),
31 - kInputIntegerBits - kOutputIntegerBits) +
kMaxT8;
output_in_q27 =
std::max(std::min(output_in_q27, static_cast<int32_t>(kMaxT8)),
static_cast<int32_t>(kMinT8));
output_data[outer_index * depth + inner_index] =
static_cast<T>(output_in_q27);
} else {
output_data[outer_index * depth + inner_index] = kMinT8;
}
}
}
}
inline void LogSoftmax(const SoftmaxParams& params, const size_t outer_size,
const size_t depth, const RuntimeShape& input_shape,
const int8_t* input_data,
const RuntimeShape& output_shape, int8_t* output_data) {
LogSoftmaxQuantized(params, outer_size, depth, input_shape, input_data,
output_shape, output_data);
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_

Some files were not shown because too many files have changed in this diff Show More