diff --git a/code/components/tflite-lib/tensorflow/lite/builtin_op_data.h b/code/components/tflite-lib/tensorflow/lite/builtin_op_data.h deleted file mode 100644 index b9d42845..00000000 --- a/code/components/tflite-lib/tensorflow/lite/builtin_op_data.h +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -// Compatibility shim for new location of interface definitions. - -#ifndef TENSORFLOW_LITE_BUILTIN_OP_DATA_H_ -#define TENSORFLOW_LITE_BUILTIN_OP_DATA_H_ - -#include "tensorflow/lite/c/builtin_op_data.h" - -#endif // TENSORFLOW_LITE_BUILTIN_OP_DATA_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/c/builtin_op_data.h b/code/components/tflite-lib/tensorflow/lite/c/builtin_op_data.h deleted file mode 100644 index 7f160972..00000000 --- a/code/components/tflite-lib/tensorflow/lite/c/builtin_op_data.h +++ /dev/null @@ -1,525 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ -#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ - -#include - -#include "tensorflow/lite/c/common.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible -// number of dimensions. -#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8 - -// TODO(aselle): Consider using "if this then that" for testing. - -// Useful placeholder to put in otherwise empty structs to avoid size warnings. -typedef struct { - char dummy; -} EmptyStructPlaceholder; - -// IMPORTANT: All new members of structs must be added at the end to ensure -// backwards compatibility. - -// Possible padding types (for convolutions) -typedef enum { - kTfLitePaddingUnknown = 0, - kTfLitePaddingSame, - kTfLitePaddingValid, -} TfLitePadding; - -typedef enum { - kTfLiteMirrorPaddingUnknown = 0, - kTfLiteMirrorPaddingReflect, - kTfLiteMirrorPaddingSymmetric, -} TfLiteMirrorPaddingMode; - -// TODO(b/130259536): We should move this out of builtin_op_data. -typedef struct { - int width; - int height; - int width_offset; - int height_offset; -} TfLitePaddingValues; - -typedef struct { - TfLiteMirrorPaddingMode mode; -} TfLiteMirrorPaddingParams; - -// Possible fused activation functions. -typedef enum { - kTfLiteActNone = 0, - kTfLiteActRelu, - kTfLiteActReluN1To1, // min(max(-1, x), 1) - kTfLiteActRelu6, // min(max(0, x), 6) - kTfLiteActTanh, - kTfLiteActSignBit, - kTfLiteActSigmoid, -} TfLiteFusedActivation; - -typedef struct { - // Parameters for CONV_2D version 1. - TfLitePadding padding; - int stride_width; - int stride_height; - TfLiteFusedActivation activation; - - // Parameters for CONV_2D version 2. - // Note: Version 2 supports dilation values not equal to 1. - int dilation_width_factor; - int dilation_height_factor; -} TfLiteConvParams; - -typedef struct { - TfLitePadding padding; - int stride_width; - int stride_height; - int stride_depth; - int dilation_width_factor; - int dilation_height_factor; - int dilation_depth_factor; - TfLiteFusedActivation activation; -} TfLiteConv3DParams; - -typedef TfLiteConv3DParams TfLiteConv3DTransposeParams; - -typedef struct { - TfLitePadding padding; - int stride_width; - int stride_height; - int filter_width; - int filter_height; - TfLiteFusedActivation activation; - struct { - TfLitePaddingValues padding; - } computed; -} TfLitePoolParams; - -typedef struct { - // Parameters for DepthwiseConv version 1 or above. - TfLitePadding padding; - int stride_width; - int stride_height; - // `depth_multiplier` is redundant. It's used by CPU kernels in - // TensorFlow 2.0 or below, but ignored in versions above. - // - // The information can be deduced from the shape of input and the shape of - // weights. Since the TFLiteConverter toolchain doesn't support partially - // specified shapes, relying on `depth_multiplier` stops us from supporting - // graphs with dynamic shape tensors. - // - // Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this - // field. - int depth_multiplier; - TfLiteFusedActivation activation; - // Parameters for DepthwiseConv version 2 or above. - int dilation_width_factor; - int dilation_height_factor; -} TfLiteDepthwiseConvParams; - -typedef struct { - int rank; - TfLiteFusedActivation activation; - - // Parameter for SVDF version 4. - bool asymmetric_quantize_inputs; -} TfLiteSVDFParams; - -typedef struct { - TfLiteFusedActivation activation; - - // Parameter for RNN version 3. - bool asymmetric_quantize_inputs; -} TfLiteRNNParams; - -typedef struct { - bool time_major; - TfLiteFusedActivation activation; - - // Parameter for Sequence RNN version 3. - bool asymmetric_quantize_inputs; -} TfLiteSequenceRNNParams; - -typedef struct { - bool time_major; - TfLiteFusedActivation activation; - bool merge_outputs; - - // Parameter for Bidirectional RNN verison 3. - bool asymmetric_quantize_inputs; -} TfLiteBidirectionalSequenceRNNParams; - -typedef enum { - kTfLiteFullyConnectedWeightsFormatDefault = 0, - kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1, -} TfLiteFullyConnectedWeightsFormat; - -typedef struct { - // Parameters for FullyConnected version 1 or above. - TfLiteFusedActivation activation; - - // Parameters for FullyConnected version 2 or above. - TfLiteFullyConnectedWeightsFormat weights_format; - - // Parameters for FullyConnected version 5 or above. - // If set to true, then the number of dimensions in the input and the output - // tensors are the same. Furthermore, all but the last dimension of the input - // and output shapes will be equal. - bool keep_num_dims; - - // Parameters for FullyConnected version 7 or above. - // If set to true and the weights are quantized, then non constant inputs - // are quantized at evaluation time with asymmetric quantization. - bool asymmetric_quantize_inputs; -} TfLiteFullyConnectedParams; - -typedef enum { - kTfLiteLshProjectionUnknown = 0, - kTfLiteLshProjectionSparse = 1, - kTfLiteLshProjectionDense = 2, -} TfLiteLSHProjectionType; - -typedef struct { - TfLiteLSHProjectionType type; -} TfLiteLSHProjectionParams; - -typedef struct { - float beta; -} TfLiteSoftmaxParams; - -typedef struct { - int axis; - TfLiteFusedActivation activation; -} TfLiteConcatenationParams; - -typedef struct { - TfLiteFusedActivation activation; - // Parameter added for the version 4. - bool pot_scale_int16; -} TfLiteAddParams; - -typedef struct { - EmptyStructPlaceholder placeholder; -} TfLiteSpaceToBatchNDParams; - -typedef struct { - EmptyStructPlaceholder placeholder; -} TfLiteBatchToSpaceNDParams; - -typedef struct { - bool adj_x; - bool adj_y; - // Parameters for BatchMatMul version 4 or above. - // If set to true and the weights are quantized, then non constant inputs - // are quantized at evaluation time with asymmetric quantization. - bool asymmetric_quantize_inputs; -} TfLiteBatchMatMulParams; - -typedef struct { - TfLiteFusedActivation activation; -} TfLiteMulParams; - -typedef struct { - TfLiteFusedActivation activation; - // Parameter added for the version 5. - bool pot_scale_int16; -} TfLiteSubParams; - -typedef struct { - TfLiteFusedActivation activation; -} TfLiteDivParams; - -typedef struct { - TfLiteFusedActivation activation; -} TfLiteL2NormParams; - -typedef struct { - int radius; - float bias; - float alpha; - float beta; -} TfLiteLocalResponseNormParams; - -typedef enum { - kTfLiteLSTMFullKernel = 0, - kTfLiteLSTMBasicKernel -} TfLiteLSTMKernelType; - -typedef struct { - // Parameters for LSTM version 1. - TfLiteFusedActivation activation; - float cell_clip; - float proj_clip; - - // Parameters for LSTM version 2. - // kTfLiteLSTMBasicKernel is only supported in version 2 or above. - TfLiteLSTMKernelType kernel_type; - - // Parameters for LSTM version 4. - bool asymmetric_quantize_inputs; -} TfLiteLSTMParams; - -typedef struct { - // Parameters needed for the underlying LSTM. - TfLiteFusedActivation activation; - float cell_clip; - float proj_clip; - - // If set to true then the first dimension is time, otherwise batch. - bool time_major; - - // Parameter for unidirectional sequence RNN version 3. - bool asymmetric_quantize_inputs; -} TfLiteUnidirectionalSequenceLSTMParams; - -typedef struct { - // Parameters supported by version 1: - // Parameters inherited for the LSTM kernel. - TfLiteFusedActivation activation; - float cell_clip; - float proj_clip; - - // If true, store the outputs of both directions in the first output. - bool merge_outputs; - - // Parameters supported by version 2: - // If set to true then the first dimension is time, otherwise batch. - bool time_major; - - // Parameters supported by version 4: - // If set to true, then hybrid ops use asymmetric quantization for inputs. - bool asymmetric_quantize_inputs; -} TfLiteBidirectionalSequenceLSTMParams; - -typedef struct { - bool align_corners; - // half_pixel_centers assumes pixels are of half the actual dimensions, and - // yields more accurate resizes. Corresponds to the same argument for the - // original TensorFlow op in TF2.0. - bool half_pixel_centers; -} TfLiteResizeBilinearParams; - -typedef struct { - bool align_corners; - bool half_pixel_centers; -} TfLiteResizeNearestNeighborParams; - -typedef struct { - EmptyStructPlaceholder placeholder; -} TfLitePadParams; - -typedef struct { - EmptyStructPlaceholder placeholder; -} TfLitePadV2Params; - -typedef struct { - // These fields are only used in old models for backward compatibility. - // In the current implementation, we use the 2nd input of the op as the shape, - // and these fields are unused. - int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT]; - int num_dimensions; -} TfLiteReshapeParams; - -typedef struct { - int ngram_size; - int max_skip_size; - bool include_all_ngrams; -} TfLiteSkipGramParams; - -typedef struct { - int block_size; -} TfLiteSpaceToDepthParams; - -typedef struct { - int block_size; -} TfLiteDepthToSpaceParams; - -typedef struct { - TfLiteType in_data_type; - TfLiteType out_data_type; -} TfLiteCastParams; - -typedef enum { - kTfLiteCombinerTypeSum = 0, - kTfLiteCombinerTypeMean = 1, - kTfLiteCombinerTypeSqrtn = 2, -} TfLiteCombinerType; - -typedef struct { - TfLiteCombinerType combiner; -} TfLiteEmbeddingLookupSparseParams; - -typedef struct { - int axis; - int batch_dims; -} TfLiteGatherParams; - -typedef struct { - EmptyStructPlaceholder placeholder; -} TfLiteTransposeParams; - -typedef struct { - bool keep_dims; -} TfLiteReducerParams; - -typedef struct { - int num_splits; -} TfLiteSplitParams; - -typedef struct { - int num_splits; -} TfLiteSplitVParams; - -typedef struct { - // TODO(ahentz): We can't have dynamic data in this struct, at least not yet. - // For now we will fix the maximum possible number of dimensions. - int squeeze_dims[8]; - int num_squeeze_dims; -} TfLiteSqueezeParams; - -typedef struct { - int begin_mask; - int end_mask; - int ellipsis_mask; - int new_axis_mask; - int shrink_axis_mask; -} TfLiteStridedSliceParams; - -typedef struct { - TfLiteType output_type; -} TfLiteArgMaxParams; - -typedef struct { - TfLiteType output_type; -} TfLiteArgMinParams; - -typedef struct { - TfLitePadding padding; - int stride_width; - int stride_height; -} TfLiteTransposeConvParams; - -typedef struct { - bool validate_indices; -} TfLiteSparseToDenseParams; - -typedef struct { - TfLiteType out_type; -} TfLiteShapeParams; - -typedef struct { - EmptyStructPlaceholder placeholder; -} TfLiteRankParams; - -typedef struct { - // Parameters supported by version 1: - float min; - float max; - int num_bits; - - // Parameters supported by version 2: - bool narrow_range; -} TfLiteFakeQuantParams; - -typedef struct { - int values_count; - int axis; -} TfLitePackParams; - -typedef struct { - int axis; -} TfLiteOneHotParams; - -typedef struct { - int num; - int axis; -} TfLiteUnpackParams; - -typedef struct { - float alpha; -} TfLiteLeakyReluParams; - -typedef struct { - TfLiteType index_out_type; -} TfLiteUniqueParams; - -typedef struct { - int seq_dim; - int batch_dim; -} TfLiteReverseSequenceParams; - -typedef struct { - EmptyStructPlaceholder placeholder; -} TfLiteMatrixDiagParams; - -typedef struct { - EmptyStructPlaceholder placeholder; -} TfLiteMatrixSetDiagParams; - -typedef struct { - int then_subgraph_index; - int else_subgraph_index; -} TfLiteIfParams; - -typedef struct { - int cond_subgraph_index; - int body_subgraph_index; -} TfLiteWhileParams; - -typedef struct { - bool exclusive; - bool reverse; -} TfLiteCumsumParams; - -typedef struct { - int init_subgraph_index; -} TfLiteCallOnceParams; - -typedef struct { - int table_id; - TfLiteType key_dtype; - TfLiteType value_dtype; -} TfLiteHashtableParams; - -typedef struct { - const char* container; - const char* shared_name; -} TfLiteVarHandleParams; - -typedef struct { - int seed; - int seed2; -} TfLiteRandomParams; - -typedef struct { - int num_boundaries; - // This points to the memory stored in the model (flatbuffer), - // and is not owned. - const float* boundaries; -} TfLiteBucketizeParams; - -typedef struct { - bool approximate; -} TfLiteGeluParams; - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/c/c_api_types.h b/code/components/tflite-lib/tensorflow/lite/c/c_api_types.h deleted file mode 100644 index d947213b..00000000 --- a/code/components/tflite-lib/tensorflow/lite/c/c_api_types.h +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This file declares types used by the pure C inference API defined in c_api.h, -// some of which are also used in the C++ and C kernel and interpreter APIs. - -#ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_ -#define TENSORFLOW_LITE_C_C_API_TYPES_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -// Define TFL_CAPI_EXPORT macro to export a function properly with a shared -// library. -#ifdef SWIG -#define TFL_CAPI_EXPORT -#elif defined(TFL_STATIC_LIBRARY_BUILD) -#define TFL_CAPI_EXPORT -#else // not definded TFL_STATIC_LIBRARY_BUILD -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - -// Note that new error status values may be added in future in order to -// indicate more fine-grained internal states, therefore, applications should -// not rely on status values being members of the enum. -typedef enum TfLiteStatus { - kTfLiteOk = 0, - - // Generally referring to an error in the runtime (i.e. interpreter) - kTfLiteError = 1, - - // Generally referring to an error from a TfLiteDelegate itself. - kTfLiteDelegateError = 2, - - // Generally referring to an error in applying a delegate due to - // incompatibility between runtime and delegate, e.g., this error is returned - // when trying to apply a TF Lite delegate onto a model graph that's already - // immutable. - kTfLiteApplicationError = 3, - - // Generally referring to serialized delegate data not being found. - // See tflite::delegates::Serialization. - kTfLiteDelegateDataNotFound = 4, - - // Generally referring to data-writing issues in delegate serialization. - // See tflite::delegates::Serialization. - kTfLiteDelegateDataWriteError = 5, - - // Generally referring to data-reading issues in delegate serialization. - // See tflite::delegates::Serialization. - kTfLiteDelegateDataReadError = 6, - - // Generally referring to issues when the TF Lite model has ops that cannot be - // resolved at runtime. This could happen when the specific op is not - // registered or built with the TF Lite framework. - kTfLiteUnresolvedOps = 7, -} TfLiteStatus; - -// Types supported by tensor -typedef enum { - kTfLiteNoType = 0, - kTfLiteFloat32 = 1, - kTfLiteInt32 = 2, - kTfLiteUInt8 = 3, - kTfLiteInt64 = 4, - kTfLiteString = 5, - kTfLiteBool = 6, - kTfLiteInt16 = 7, - kTfLiteComplex64 = 8, - kTfLiteInt8 = 9, - kTfLiteFloat16 = 10, - kTfLiteFloat64 = 11, - kTfLiteComplex128 = 12, - kTfLiteUInt64 = 13, - kTfLiteResource = 14, - kTfLiteVariant = 15, - kTfLiteUInt32 = 16, - kTfLiteUInt16 = 17, -} TfLiteType; - -// Legacy. Will be deprecated in favor of TfLiteAffineQuantization. -// If per-layer quantization is specified this field will still be populated in -// addition to TfLiteAffineQuantization. -// Parameters for asymmetric quantization. Quantized values can be converted -// back to float using: -// real_value = scale * (quantized_value - zero_point) -typedef struct TfLiteQuantizationParams { - float scale; - int32_t zero_point; -} TfLiteQuantizationParams; - -// -------------------------------------------------------------------------- -// Opaque types used by c_api.h, c_api_opaque.h and common.h. - -// TfLiteOpaqueContext is an opaque version of TfLiteContext; -typedef struct TfLiteOpaqueContext TfLiteOpaqueContext; - -// TfLiteOpaqueNode is an opaque version of TfLiteNode; -typedef struct TfLiteOpaqueNode TfLiteOpaqueNode; - -// TfLiteOpaqueTensor is an opaque version of TfLiteTensor; -typedef struct TfLiteOpaqueTensor TfLiteOpaqueTensor; - -#ifdef __cplusplus -} // extern C -#endif -#endif // TENSORFLOW_LITE_C_C_API_TYPES_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.cc b/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.cc deleted file mode 100644 index 7070eaa5..00000000 --- a/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.cc +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/core/api/error_reporter.h" -#include - -namespace tflite { - -int ErrorReporter::Report(const char* format, ...) { - va_list args; - va_start(args, format); - int code = Report(format, args); - va_end(args); - return code; -} - -// TODO(aselle): Make the name of ReportError on context the same, so -// we can use the ensure functions w/o a context and w/ a reporter. -int ErrorReporter::ReportError(void*, const char* format, ...) { - va_list args; - va_start(args, format); - int code = Report(format, args); - va_end(args); - return code; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.h b/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.h deleted file mode 100644 index 05839a61..00000000 --- a/code/components/tflite-lib/tensorflow/lite/core/api/error_reporter.h +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_ -#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_ - -#include - -namespace tflite { - -/// A functor that reports error to supporting system. Invoked similar to -/// printf. -/// -/// Usage: -/// ErrorReporter foo; -/// foo.Report("test %d", 5); -/// or -/// va_list args; -/// foo.Report("test %d", args); // where args is va_list -/// -/// Subclass ErrorReporter to provide another reporting destination. -/// For example, if you have a GUI program, you might redirect to a buffer -/// that drives a GUI error log box. -class ErrorReporter { - public: - virtual ~ErrorReporter() {} - virtual int Report(const char* format, va_list args) = 0; - int Report(const char* format, ...); - int ReportError(void*, const char* format, ...); -}; - -} // namespace tflite - -// You should not make bare calls to the error reporter, instead use the -// TF_LITE_REPORT_ERROR macro, since this allows message strings to be -// stripped when the binary size has to be optimized. If you are looking to -// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and -// every call will be stubbed out, taking no memory. -#ifndef TF_LITE_STRIP_ERROR_STRINGS -#define TF_LITE_REPORT_ERROR(reporter, ...) \ - do { \ - static_cast(reporter)->Report(__VA_ARGS__); \ - } while (false) -#else // TF_LITE_STRIP_ERROR_STRINGS -#define TF_LITE_REPORT_ERROR(reporter, ...) -#endif // TF_LITE_STRIP_ERROR_STRINGS - -#endif // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.cc b/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.cc deleted file mode 100644 index 6f7e4c2a..00000000 --- a/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.cc +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/core/api/op_resolver.h" - -#include "flatbuffers/flatbuffers.h" // from @flatbuffers -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/schema/schema_utils.h" - -namespace tflite { - -TfLiteStatus GetRegistrationFromOpCode( - const OperatorCode* opcode, const OpResolver& op_resolver, - ErrorReporter* error_reporter, const TfLiteRegistration** registration) { - TfLiteStatus status = kTfLiteOk; - *registration = nullptr; - auto builtin_code = GetBuiltinCode(opcode); - int version = opcode->version(); - - if (builtin_code > BuiltinOperator_MAX) { - TF_LITE_REPORT_ERROR( - error_reporter, - "Op builtin_code out of range: %d. Are you using old TFLite binary " - "with newer model?", - builtin_code); - status = kTfLiteError; - } else if (builtin_code != BuiltinOperator_CUSTOM) { - *registration = op_resolver.FindOp(builtin_code, version); - if (*registration == nullptr) { - TF_LITE_REPORT_ERROR( - error_reporter, - "Didn't find op for builtin opcode '%s' version '%d'. " - "An older version of this builtin might be supported. " - "Are you using an old TFLite binary with a newer model?\n", - EnumNameBuiltinOperator(builtin_code), version); - status = kTfLiteError; - } - } else if (!opcode->custom_code()) { - TF_LITE_REPORT_ERROR( - error_reporter, - "Operator with CUSTOM builtin_code has no custom_code.\n"); - status = kTfLiteError; - } else { - const char* name = opcode->custom_code()->c_str(); - *registration = op_resolver.FindOp(name, version); - if (*registration == nullptr) { - // Do not report error for unresolved custom op, we do the final check - // while preparing ops. - status = kTfLiteError; - } - } - return status; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.h b/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.h deleted file mode 100644 index cec1f2dd..00000000 --- a/code/components/tflite-lib/tensorflow/lite/core/api/op_resolver.h +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_ -#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_ - -#include -#include -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/schema/schema_generated.h" - -// Opaque type similar to TfLiteDelegate / TfLiteOpaqueDelegate. -// This is used for cases (e.g. when using "TF Lite with Google Play Services") -// where the TF Lite runtime might be built using a newer (or older) -// version of the TF Lite sources than the app, and hence might have a -// different definition of the TfLiteDelegate type. TF Lite APIs use -// TfLiteOpaqueDelegate rather than TfLiteDelegate when they want to -// refer to a delegate defined with that potentially different version -// of the TfLiteDelegate type. -struct TfLiteOpaqueDelegateStruct; - -namespace tflite { - -/// Abstract interface that returns TfLiteRegistrations given op codes or custom -/// op names. This is the mechanism that ops being referenced in the flatbuffer -/// model are mapped to executable function pointers (TfLiteRegistrations). -class OpResolver { - public: - /// Finds the op registration for a builtin operator by enum code. - virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op, - int version) const = 0; - /// Finds the op registration of a custom operator by op name. - virtual const TfLiteRegistration* FindOp(const char* op, - int version) const = 0; - - // Represents a sequence of delegates. - using TfLiteDelegatePtrVector = - std::vector>; - - // Returns optional delegates for resolving and handling ops in the flatbuffer - // model. This may be used in addition to the standard TfLiteRegistration - // lookup for graph resolution. - // WARNING: This API is deprecated, GetDelegateCreators is preferred. - virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const { - return {}; - } - - // Represents a function that creates a TfLite delegate instance. - using TfLiteDelegateCreator = - std::function( - int /*num_threads*/)>; - - // Represents a sequence of delegate creator functions. - using TfLiteDelegateCreators = std::vector; - - // Returns a vector of delegate creators to create optional delegates for - // resolving and handling ops in the flatbuffer model. This may be used in - // addition to the standard TfLiteRegistration lookup for graph resolution. - // - // Note that this method is not used (will not be called) if you are using - // TF Lite in Google Play Services; the GetOpaqueDelegateCreators method - // (see below) is used for that case. - virtual TfLiteDelegateCreators GetDelegateCreators() const { return {}; } - - // TODO(b/202712825): it would be nice if we could avoid the need for separate - // "opaque" types & methods for use only with TF Lite in Google Play Services. - - // Represents an opaque delegate instance. - // WARNING: Experimental interface, subject to change. - using TfLiteOpaqueDelegatePtr = - std::unique_ptr; - - // Represents a function that creates an opaque delegate instance. - // WARNING: Experimental interface, subject to change. - using TfLiteOpaqueDelegateCreator = - std::function; - - // Represents a sequence of opaque delegate creator functions. - // WARNING: Experimental interface, subject to change. - using TfLiteOpaqueDelegateCreators = std::vector; - - // Returns a vector of opaque delegate creators to create optional opaque - // delegates for resolving and handling ops in the flatbuffer model. This may - // be used in addition to the standard TfLiteRegistration lookup for graph - // resolution. - // - // Note that this method will be called only if you are using TF Lite in - // Google Play Services; if you are using regular TF Lite, GetDelegateCreators - // (see above) is used instead. - // - // WARNING: Experimental interface, subject to change. - virtual TfLiteOpaqueDelegateCreators GetOpaqueDelegateCreators() const { - return {}; - } - - virtual ~OpResolver() {} - - private: - /// Returns true if this OpResolver may contain any "user defined" ops. - /// By "user defined" ops, we mean any op definitions other than those - /// contained in tflite::ops::builtin::BuiltinOpResolver. - /// - /// If this method returns true, it doesn't necessarily mean that the - /// OpResolver contains a user-defined op, just that the absence of - /// user-defined ops can't be guaranteed. - /// - /// Note that "user-defined" ops are not the same as "custom" ops; - /// BuiltinOpResolver may support certain "custom" ops, in addition to - /// "builtin" ops, and may not support all of the "builtin" op enum values. - virtual bool MayContainUserDefinedOps() const { return true; } - - friend class OpResolverInternal; -}; - -// Handles the logic for converting between an OperatorCode structure extracted -// from a flatbuffer and information about a registered operator -// implementation. -TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode, - const OpResolver& op_resolver, - ErrorReporter* error_reporter, - const TfLiteRegistration** registration); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.cc b/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.cc deleted file mode 100644 index 3aac16b6..00000000 --- a/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/core/api/tensor_utils.h" - -#include - -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) { - if (!tensor->is_variable) { - return kTfLiteOk; - } - // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it - // to the value of the buffer. - int value = 0; - if (tensor->type == kTfLiteInt8) { - value = tensor->params.zero_point; - } - // TODO(b/139446230): Provide a platform header to better handle these - // specific scenarios. -#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \ - defined(__i386) || defined(__x86__) || defined(__X86__) || \ - defined(_X86_) || defined(_M_IX86) || defined(_M_X64) - memset(tensor->data.raw, value, tensor->bytes); -#else - char* raw_ptr = tensor->data.raw; - for (size_t i = 0; i < tensor->bytes; ++i) { - *raw_ptr = value; - raw_ptr++; - } -#endif - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.h b/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.h deleted file mode 100644 index 9f1cf94a..00000000 --- a/code/components/tflite-lib/tensorflow/lite/core/api/tensor_utils.h +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_ -#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_ - -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -// Resets a variable tensor to the default value. -TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/bits.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/bits.h deleted file mode 100644 index 04b3ba6f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/bits.h +++ /dev/null @@ -1,102 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_ - -#ifdef __cplusplus -#include - -extern "C" { -#endif - -static inline int CountLeadingZeros32Slow(uint64_t n) { - int zeroes = 28; - if (n >> 16) zeroes -= 16, n >>= 16; - if (n >> 8) zeroes -= 8, n >>= 8; - if (n >> 4) zeroes -= 4, n >>= 4; - return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes; -} - -static inline int CountLeadingZeros32(uint32_t n) { -#if defined(_MSC_VER) - unsigned long result = 0; // NOLINT(runtime/int) - if (_BitScanReverse(&result, n)) { - return 31 - result; - } - return 32; -#elif defined(__GNUC__) - - // Handle 0 as a special case because __builtin_clz(0) is undefined. - if (n == 0) { - return 32; - } - return __builtin_clz(n); -#else - return CountLeadingZeros32Slow(n); -#endif -} - -static inline int MostSignificantBit32(uint32_t n) { - return 32 - CountLeadingZeros32(n); -} - -static inline int CountLeadingZeros64Slow(uint64_t n) { - int zeroes = 60; - if (n >> 32) zeroes -= 32, n >>= 32; - if (n >> 16) zeroes -= 16, n >>= 16; - if (n >> 8) zeroes -= 8, n >>= 8; - if (n >> 4) zeroes -= 4, n >>= 4; - return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0"[n] + zeroes; -} - -static inline int CountLeadingZeros64(uint64_t n) { -#if defined(_MSC_VER) && defined(_M_X64) - // MSVC does not have __builtin_clzll. Use _BitScanReverse64. - unsigned long result = 0; // NOLINT(runtime/int) - if (_BitScanReverse64(&result, n)) { - return 63 - result; - } - return 64; -#elif defined(_MSC_VER) - // MSVC does not have __builtin_clzll. Compose two calls to _BitScanReverse - unsigned long result = 0; // NOLINT(runtime/int) - if ((n >> 32) && _BitScanReverse(&result, n >> 32)) { - return 31 - result; - } - if (_BitScanReverse(&result, n)) { - return 63 - result; - } - return 64; -#elif defined(__GNUC__) - - // Handle 0 as a special case because __builtin_clzll(0) is undefined. - if (n == 0) { - return 64; - } - return __builtin_clzll(n); -#else - return CountLeadingZeros64Slow(n); -#endif -} - -static inline int MostSignificantBit64(uint64_t n) { - return 64 - CountLeadingZeros64(n); -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_BITS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.cc b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.cc deleted file mode 100644 index bcdd9cc0..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.cc +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/fft.h" - -#include - -#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h" - -void FftCompute(struct FftState* state, const int16_t* input, - int input_scale_shift) { - const size_t input_size = state->input_size; - const size_t fft_size = state->fft_size; - - int16_t* fft_input = state->input; - // First, scale the input by the given shift. - size_t i; - for (i = 0; i < input_size; ++i) { - fft_input[i] = static_cast(static_cast(input[i]) - << input_scale_shift); - } - // Zero out whatever else remains in the top part of the input. - for (; i < fft_size; ++i) { - fft_input[i] = 0; - } - - // Apply the FFT. - kissfft_fixed16::kiss_fftr( - reinterpret_cast(state->scratch), - state->input, - reinterpret_cast(state->output)); -} - -void FftInit(struct FftState* state) { - // All the initialization is done in FftPopulateState() -} - -void FftReset(struct FftState* state) { - memset(state->input, 0, state->fft_size * sizeof(*state->input)); - memset(state->output, 0, (state->fft_size / 2 + 1) * sizeof(*state->output)); -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.h deleted file mode 100644 index aaffa69d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct complex_int16_t { - int16_t real; - int16_t imag; -}; - -struct FftState { - int16_t* input; - struct complex_int16_t* output; - size_t fft_size; - size_t input_size; - void* scratch; - size_t scratch_size; -}; - -void FftCompute(struct FftState* state, const int16_t* input, - int input_scale_shift); - -void FftInit(struct FftState* state); - -void FftReset(struct FftState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc deleted file mode 100644 index ed3dc8fb..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h" - -#include - -#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h" - -int FftPopulateState(struct FftState* state, size_t input_size) { - state->input_size = input_size; - state->fft_size = 1; - while (state->fft_size < state->input_size) { - state->fft_size <<= 1; - } - - state->input = reinterpret_cast( - malloc(state->fft_size * sizeof(*state->input))); - if (state->input == nullptr) { - fprintf(stderr, "Failed to alloc fft input buffer\n"); - return 0; - } - - state->output = reinterpret_cast( - malloc((state->fft_size / 2 + 1) * sizeof(*state->output) * 2)); - if (state->output == nullptr) { - fprintf(stderr, "Failed to alloc fft output buffer\n"); - return 0; - } - - // Ask kissfft how much memory it wants. - size_t scratch_size = 0; - kissfft_fixed16::kiss_fftr_cfg kfft_cfg = kissfft_fixed16::kiss_fftr_alloc( - state->fft_size, 0, nullptr, &scratch_size); - if (kfft_cfg != nullptr) { - fprintf(stderr, "Kiss memory sizing failed.\n"); - return 0; - } - state->scratch = malloc(scratch_size); - if (state->scratch == nullptr) { - fprintf(stderr, "Failed to alloc fft scratch buffer\n"); - return 0; - } - state->scratch_size = scratch_size; - // Let kissfft configure the scratch space we just allocated - kfft_cfg = kissfft_fixed16::kiss_fftr_alloc(state->fft_size, 0, - state->scratch, &scratch_size); - if (kfft_cfg != state->scratch) { - fprintf(stderr, "Kiss memory preallocation strategy failed.\n"); - return 0; - } - return 1; -} - -void FftFreeStateContents(struct FftState* state) { - free(state->input); - free(state->output); - free(state->scratch); -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.h deleted file mode 100644 index 6a471301..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/fft_util.h +++ /dev/null @@ -1,34 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_ - -#include "tensorflow/lite/experimental/microfrontend/lib/fft.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Prepares and FFT for the given input size. -int FftPopulateState(struct FftState* state, size_t input_size); - -// Frees any allocated buffers. -void FftFreeStateContents(struct FftState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FFT_UTIL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.c deleted file mode 100644 index 80f8738f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.c +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h" - -#include - -#include "tensorflow/lite/experimental/microfrontend/lib/bits.h" - -void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state, - struct complex_int16_t* fft_output, - int32_t* energy) { - const int end_index = state->end_index; - int i; - energy += state->start_index; - fft_output += state->start_index; - for (i = state->start_index; i < end_index; ++i) { - const int32_t real = fft_output->real; - const int32_t imag = fft_output->imag; - fft_output++; - const uint32_t mag_squared = (real * real) + (imag * imag); - *energy++ = mag_squared; - } -} - -void FilterbankAccumulateChannels(struct FilterbankState* state, - const int32_t* energy) { - uint64_t* work = state->work; - uint64_t weight_accumulator = 0; - uint64_t unweight_accumulator = 0; - - const int16_t* channel_frequency_starts = state->channel_frequency_starts; - const int16_t* channel_weight_starts = state->channel_weight_starts; - const int16_t* channel_widths = state->channel_widths; - - int num_channels_plus_1 = state->num_channels + 1; - int i; - for (i = 0; i < num_channels_plus_1; ++i) { - const int32_t* magnitudes = energy + *channel_frequency_starts++; - const int16_t* weights = state->weights + *channel_weight_starts; - const int16_t* unweights = state->unweights + *channel_weight_starts++; - const int width = *channel_widths++; - int j; - for (j = 0; j < width; ++j) { - weight_accumulator += *weights++ * ((uint64_t)*magnitudes); - unweight_accumulator += *unweights++ * ((uint64_t)*magnitudes); - ++magnitudes; - } - *work++ = weight_accumulator; - weight_accumulator = unweight_accumulator; - unweight_accumulator = 0; - } -} - -static uint16_t Sqrt32(uint32_t num) { - if (num == 0) { - return 0; - } - uint32_t res = 0; - int max_bit_number = 32 - MostSignificantBit32(num); - max_bit_number |= 1; - uint32_t bit = 1U << (31 - max_bit_number); - int iterations = (31 - max_bit_number) / 2 + 1; - while (iterations--) { - if (num >= res + bit) { - num -= res + bit; - res = (res >> 1U) + bit; - } else { - res >>= 1U; - } - bit >>= 2U; - } - // Do rounding - if we have the bits. - if (num > res && res != 0xFFFF) { - ++res; - } - return res; -} - -static uint32_t Sqrt64(uint64_t num) { - // Take a shortcut and just use 32 bit operations if the upper word is all - // clear. This will cause a slight off by one issue for numbers close to 2^32, - // but it probably isn't going to matter (and gives us a big performance win). - if ((num >> 32) == 0) { - return Sqrt32((uint32_t)num); - } - uint64_t res = 0; - int max_bit_number = 64 - MostSignificantBit64(num); - max_bit_number |= 1; - uint64_t bit = 1ULL << (63 - max_bit_number); - int iterations = (63 - max_bit_number) / 2 + 1; - while (iterations--) { - if (num >= res + bit) { - num -= res + bit; - res = (res >> 1U) + bit; - } else { - res >>= 1U; - } - bit >>= 2U; - } - // Do rounding - if we have the bits. - if (num > res && res != 0xFFFFFFFFLL) { - ++res; - } - return res; -} - -uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) { - const int num_channels = state->num_channels; - const uint64_t* work = state->work + 1; - // Reuse the work buffer since we're fine clobbering it at this point to hold - // the output. - uint32_t* output = (uint32_t*)state->work; - int i; - for (i = 0; i < num_channels; ++i) { - *output++ = Sqrt64(*work++) >> scale_down_shift; - } - return (uint32_t*)state->work; -} - -void FilterbankReset(struct FilterbankState* state) { - memset(state->work, 0, (state->num_channels + 1) * sizeof(*state->work)); -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.h deleted file mode 100644 index 1e6d3885..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank.h +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_ - -#include -#include - -#include "tensorflow/lite/experimental/microfrontend/lib/fft.h" - -#define kFilterbankBits 12 - -#ifdef __cplusplus -extern "C" { -#endif - -struct FilterbankState { - int num_channels; - int start_index; - int end_index; - int16_t* channel_frequency_starts; - int16_t* channel_weight_starts; - int16_t* channel_widths; - int16_t* weights; - int16_t* unweights; - uint64_t* work; -}; - -// Converts the relevant complex values of an FFT output into energy (the -// square magnitude). -void FilterbankConvertFftComplexToEnergy(struct FilterbankState* state, - struct complex_int16_t* fft_output, - int32_t* energy); - -// Computes the mel-scale filterbank on the given energy array. Output is cached -// internally - to fetch it, you need to call FilterbankSqrt. -void FilterbankAccumulateChannels(struct FilterbankState* state, - const int32_t* energy); - -// Applies an integer square root to the 64 bit intermediate values of the -// filterbank, and returns a pointer to them. Memory will be invalidated the -// next time FilterbankAccumulateChannels is called. -uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift); - -void FilterbankReset(struct FilterbankState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c deleted file mode 100644 index f18ebf54..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.c +++ /dev/null @@ -1,220 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h" - -#include -#include -#include - -#define kFilterbankIndexAlignment 4 -#define kFilterbankChannelBlockSize 4 - -void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config) { - config->num_channels = 32; - config->lower_band_limit = 125.0f; - config->upper_band_limit = 7500.0f; - config->output_scale_shift = 7; -} - -static float FreqToMel(float freq) { return 1127.0 * log1p(freq / 700.0); } - -static void CalculateCenterFrequencies(const int num_channels, - const float lower_frequency_limit, - const float upper_frequency_limit, - float* center_frequencies) { - assert(lower_frequency_limit >= 0.0f); - assert(upper_frequency_limit > lower_frequency_limit); - - const float mel_low = FreqToMel(lower_frequency_limit); - const float mel_hi = FreqToMel(upper_frequency_limit); - const float mel_span = mel_hi - mel_low; - const float mel_spacing = mel_span / ((float)num_channels); - int i; - for (i = 0; i < num_channels; ++i) { - center_frequencies[i] = mel_low + (mel_spacing * (i + 1)); - } -} - -static void QuantizeFilterbankWeights(const float float_weight, int16_t* weight, - int16_t* unweight) { - *weight = floor(float_weight * (1 << kFilterbankBits) + 0.5); - *unweight = floor((1.0 - float_weight) * (1 << kFilterbankBits) + 0.5); -} - -int FilterbankPopulateState(const struct FilterbankConfig* config, - struct FilterbankState* state, int sample_rate, - int spectrum_size) { - state->num_channels = config->num_channels; - const int num_channels_plus_1 = config->num_channels + 1; - - // How should we align things to index counts given the byte alignment? - const int index_alignment = - (kFilterbankIndexAlignment < sizeof(int16_t) - ? 1 - : kFilterbankIndexAlignment / sizeof(int16_t)); - - state->channel_frequency_starts = - malloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts)); - state->channel_weight_starts = - malloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts)); - state->channel_widths = - malloc(num_channels_plus_1 * sizeof(*state->channel_widths)); - state->work = malloc(num_channels_plus_1 * sizeof(*state->work)); - - float* center_mel_freqs = - malloc(num_channels_plus_1 * sizeof(*center_mel_freqs)); - int16_t* actual_channel_starts = - malloc(num_channels_plus_1 * sizeof(*actual_channel_starts)); - int16_t* actual_channel_widths = - malloc(num_channels_plus_1 * sizeof(*actual_channel_widths)); - - if (state->channel_frequency_starts == NULL || - state->channel_weight_starts == NULL || state->channel_widths == NULL || - center_mel_freqs == NULL || actual_channel_starts == NULL || - actual_channel_widths == NULL) { - free(center_mel_freqs); - free(actual_channel_starts); - free(actual_channel_widths); - fprintf(stderr, "Failed to allocate channel buffers\n"); - return 0; - } - - CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit, - config->upper_band_limit, center_mel_freqs); - - // Always exclude DC. - const float hz_per_sbin = 0.5 * sample_rate / ((float)spectrum_size - 1); - state->start_index = 1.5 + config->lower_band_limit / hz_per_sbin; - state->end_index = 0; // Initialized to zero here, but actually set below. - - // For each channel, we need to figure out what frequencies belong to it, and - // how much padding we need to add so that we can efficiently multiply the - // weights and unweights for accumulation. To simplify the multiplication - // logic, all channels will have some multiplication to do (even if there are - // no frequencies that accumulate to that channel) - they will be directed to - // a set of zero weights. - int chan_freq_index_start = state->start_index; - int weight_index_start = 0; - int needs_zeros = 0; - - int chan; - for (chan = 0; chan < num_channels_plus_1; ++chan) { - // Keep jumping frequencies until we overshoot the bound on this channel. - int freq_index = chan_freq_index_start; - while (FreqToMel((freq_index)*hz_per_sbin) <= center_mel_freqs[chan]) { - ++freq_index; - } - - const int width = freq_index - chan_freq_index_start; - actual_channel_starts[chan] = chan_freq_index_start; - actual_channel_widths[chan] = width; - - if (width == 0) { - // This channel doesn't actually get anything from the frequencies, it's - // always zero. We need then to insert some 'zero' weights into the - // output, and just redirect this channel to do a single multiplication at - // this point. For simplicity, the zeros are placed at the beginning of - // the weights arrays, so we have to go and update all the other - // weight_starts to reflect this shift (but only once). - state->channel_frequency_starts[chan] = 0; - state->channel_weight_starts[chan] = 0; - state->channel_widths[chan] = kFilterbankChannelBlockSize; - if (!needs_zeros) { - needs_zeros = 1; - int j; - for (j = 0; j < chan; ++j) { - state->channel_weight_starts[j] += kFilterbankChannelBlockSize; - } - weight_index_start += kFilterbankChannelBlockSize; - } - } else { - // How far back do we need to go to ensure that we have the proper - // alignment? - const int aligned_start = - (chan_freq_index_start / index_alignment) * index_alignment; - const int aligned_width = (chan_freq_index_start - aligned_start + width); - const int padded_width = - (((aligned_width - 1) / kFilterbankChannelBlockSize) + 1) * - kFilterbankChannelBlockSize; - - state->channel_frequency_starts[chan] = aligned_start; - state->channel_weight_starts[chan] = weight_index_start; - state->channel_widths[chan] = padded_width; - weight_index_start += padded_width; - } - chan_freq_index_start = freq_index; - } - - // Allocate the two arrays to store the weights - weight_index_start contains - // the index of what would be the next set of weights that we would need to - // add, so that's how many weights we need to allocate. - state->weights = calloc(weight_index_start, sizeof(*state->weights)); - state->unweights = calloc(weight_index_start, sizeof(*state->unweights)); - - // If the alloc failed, we also need to nuke the arrays. - if (state->weights == NULL || state->unweights == NULL) { - free(center_mel_freqs); - free(actual_channel_starts); - free(actual_channel_widths); - fprintf(stderr, "Failed to allocate weights or unweights\n"); - return 0; - } - - // Next pass, compute all the weights. Since everything has been memset to - // zero, we only need to fill in the weights that correspond to some frequency - // for a channel. - const float mel_low = FreqToMel(config->lower_band_limit); - for (chan = 0; chan < num_channels_plus_1; ++chan) { - int frequency = actual_channel_starts[chan]; - const int num_frequencies = actual_channel_widths[chan]; - const int frequency_offset = - frequency - state->channel_frequency_starts[chan]; - const int weight_start = state->channel_weight_starts[chan]; - const float denom_val = (chan == 0) ? mel_low : center_mel_freqs[chan - 1]; - - int j; - for (j = 0; j < num_frequencies; ++j, ++frequency) { - const float weight = - (center_mel_freqs[chan] - FreqToMel(frequency * hz_per_sbin)) / - (center_mel_freqs[chan] - denom_val); - - // Make the float into an integer for the weights (and unweights). - const int weight_index = weight_start + frequency_offset + j; - QuantizeFilterbankWeights(weight, state->weights + weight_index, - state->unweights + weight_index); - } - if (frequency > state->end_index) { - state->end_index = frequency; - } - } - - free(center_mel_freqs); - free(actual_channel_starts); - free(actual_channel_widths); - if (state->end_index >= spectrum_size) { - fprintf(stderr, "Filterbank end_index is above spectrum size.\n"); - return 0; - } - return 1; -} - -void FilterbankFreeStateContents(struct FilterbankState* state) { - free(state->channel_frequency_starts); - free(state->channel_weight_starts); - free(state->channel_widths); - free(state->weights); - free(state->unweights); - free(state->work); -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h deleted file mode 100644 index 781d1024..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_ - -#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct FilterbankConfig { - // number of frequency channel buckets for filterbank - int num_channels; - // maximum frequency to include - float upper_band_limit; - // minimum frequency to include - float lower_band_limit; - // unused - int output_scale_shift; -}; - -// Fills the frontendConfig with "sane" defaults. -void FilterbankFillConfigWithDefaults(struct FilterbankConfig* config); - -// Allocates any buffers. -int FilterbankPopulateState(const struct FilterbankConfig* config, - struct FilterbankState* state, int sample_rate, - int spectrum_size); - -// Frees any allocated buffers. -void FilterbankFreeStateContents(struct FilterbankState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FILTERBANK_UTIL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.c deleted file mode 100644 index 9de2a879..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.c +++ /dev/null @@ -1,72 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h" - -#include "tensorflow/lite/experimental/microfrontend/lib/bits.h" - -struct FrontendOutput FrontendProcessSamples(struct FrontendState* state, - const int16_t* samples, - size_t num_samples, - size_t* num_samples_read) { - struct FrontendOutput output; - output.values = NULL; - output.size = 0; - - // Try to apply the window - if it fails, return and wait for more data. - if (!WindowProcessSamples(&state->window, samples, num_samples, - num_samples_read)) { - return output; - } - - // Apply the FFT to the window's output (and scale it so that the fixed point - // FFT can have as much resolution as possible). - int input_shift = - 15 - MostSignificantBit32(state->window.max_abs_output_value); - FftCompute(&state->fft, state->window.output, input_shift); - - // We can re-ruse the fft's output buffer to hold the energy. - int32_t* energy = (int32_t*)state->fft.output; - - FilterbankConvertFftComplexToEnergy(&state->filterbank, state->fft.output, - energy); - - FilterbankAccumulateChannels(&state->filterbank, energy); - uint32_t* scaled_filterbank = FilterbankSqrt(&state->filterbank, input_shift); - - // Apply noise reduction. - NoiseReductionApply(&state->noise_reduction, scaled_filterbank); - - if (state->pcan_gain_control.enable_pcan) { - PcanGainControlApply(&state->pcan_gain_control, scaled_filterbank); - } - - // Apply the log and scale. - int correction_bits = - MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2); - uint16_t* logged_filterbank = - LogScaleApply(&state->log_scale, scaled_filterbank, - state->filterbank.num_channels, correction_bits); - - output.size = state->filterbank.num_channels; - output.values = logged_filterbank; - return output; -} - -void FrontendReset(struct FrontendState* state) { - WindowReset(&state->window); - FftReset(&state->fft); - FilterbankReset(&state->filterbank); - NoiseReductionReset(&state->noise_reduction); -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.h deleted file mode 100644 index 883df5fd..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_ - -#include -#include - -#include "tensorflow/lite/experimental/microfrontend/lib/fft.h" -#include "tensorflow/lite/experimental/microfrontend/lib/filterbank.h" -#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h" -#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h" -#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h" -#include "tensorflow/lite/experimental/microfrontend/lib/window.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct FrontendState { - struct WindowState window; - struct FftState fft; - struct FilterbankState filterbank; - struct NoiseReductionState noise_reduction; - struct PcanGainControlState pcan_gain_control; - struct LogScaleState log_scale; -}; - -struct FrontendOutput { - const uint16_t* values; - size_t size; -}; - -// Main entry point to processing frontend samples. Updates num_samples_read to -// contain the number of samples that have been consumed from the input array. -// Returns a struct containing the generated output. If not enough samples were -// added to generate a feature vector, the returned size will be 0 and the -// values pointer will be NULL. Note that the output pointer will be invalidated -// as soon as FrontendProcessSamples is called again, so copy the contents -// elsewhere if you need to use them later. -struct FrontendOutput FrontendProcessSamples(struct FrontendState* state, - const int16_t* samples, - size_t num_samples, - size_t* num_samples_read); - -void FrontendReset(struct FrontendState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c deleted file mode 100644 index 27224f6d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.c +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/frontend_util.h" - -#include -#include - -#include "tensorflow/lite/experimental/microfrontend/lib/bits.h" - -void FrontendFillConfigWithDefaults(struct FrontendConfig* config) { - WindowFillConfigWithDefaults(&config->window); - FilterbankFillConfigWithDefaults(&config->filterbank); - NoiseReductionFillConfigWithDefaults(&config->noise_reduction); - PcanGainControlFillConfigWithDefaults(&config->pcan_gain_control); - LogScaleFillConfigWithDefaults(&config->log_scale); -} - -int FrontendPopulateState(const struct FrontendConfig* config, - struct FrontendState* state, int sample_rate) { - memset(state, 0, sizeof(*state)); - - if (!WindowPopulateState(&config->window, &state->window, sample_rate)) { - fprintf(stderr, "Failed to populate window state\n"); - return 0; - } - - if (!FftPopulateState(&state->fft, state->window.size)) { - fprintf(stderr, "Failed to populate fft state\n"); - return 0; - } - FftInit(&state->fft); - - if (!FilterbankPopulateState(&config->filterbank, &state->filterbank, - sample_rate, state->fft.fft_size / 2 + 1)) { - fprintf(stderr, "Failed to populate filterbank state\n"); - return 0; - } - - if (!NoiseReductionPopulateState(&config->noise_reduction, - &state->noise_reduction, - state->filterbank.num_channels)) { - fprintf(stderr, "Failed to populate noise reduction state\n"); - return 0; - } - - int input_correction_bits = - MostSignificantBit32(state->fft.fft_size) - 1 - (kFilterbankBits / 2); - if (!PcanGainControlPopulateState( - &config->pcan_gain_control, &state->pcan_gain_control, - state->noise_reduction.estimate, state->filterbank.num_channels, - state->noise_reduction.smoothing_bits, input_correction_bits)) { - fprintf(stderr, "Failed to populate pcan gain control state\n"); - return 0; - } - - if (!LogScalePopulateState(&config->log_scale, &state->log_scale)) { - fprintf(stderr, "Failed to populate log scale state\n"); - return 0; - } - - FrontendReset(state); - - // All good, return a true value. - return 1; -} - -void FrontendFreeStateContents(struct FrontendState* state) { - WindowFreeStateContents(&state->window); - FftFreeStateContents(&state->fft); - FilterbankFreeStateContents(&state->filterbank); - NoiseReductionFreeStateContents(&state->noise_reduction); - PcanGainControlFreeStateContents(&state->pcan_gain_control); -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h deleted file mode 100644 index 895ce6cd..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_ - -#include "tensorflow/lite/experimental/microfrontend/lib/fft_util.h" -#include "tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h" -#include "tensorflow/lite/experimental/microfrontend/lib/frontend.h" -#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h" -#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h" -#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h" -#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct FrontendConfig { - struct WindowConfig window; - struct FilterbankConfig filterbank; - struct NoiseReductionConfig noise_reduction; - struct PcanGainControlConfig pcan_gain_control; - struct LogScaleConfig log_scale; -}; - -// Fills the frontendConfig with "sane" defaults. -void FrontendFillConfigWithDefaults(struct FrontendConfig* config); - -// Allocates any buffers. -int FrontendPopulateState(const struct FrontendConfig* config, - struct FrontendState* state, int sample_rate); - -// Frees any allocated buffers. -void FrontendFreeStateContents(struct FrontendState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_FRONTEND_UTIL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h deleted file mode 100644 index 33556dab..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_ - -// This header file should be included in all variants of kiss_fft_$type.{h,cc} -// so that their sub-included source files do not mistakenly wrap libc header -// files within their kissfft_$type namespaces. -// E.g, This header avoids kissfft_int16.h containing: -// namespace kiss_fft_int16 { -// #include "kiss_fft.h" -// } -// where kiss_fft_.h contains: -// #include -// -// TRICK: By including the following header files here, their preprocessor -// header guards prevent them being re-defined inside of the kiss_fft_$type -// namespaces declared within the kiss_fft_$type.{h,cc} sources. -// Note that the original kiss_fft*.h files are untouched since they -// may be used in libraries that include them directly. - -#include -#include -#include -#include -#include - -#ifdef FIXED_POINT -#include -#endif - -#ifdef USE_SIMD -#include -#endif -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_COMMON_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h deleted file mode 100644 index beee99aa..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_ - -#include "tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h" - -// Wrap 16-bit kiss fft in its own namespace. Enables us to link an application -// with different kiss fft resultions (16/32 bit interger, float, double) -// without getting a linker error. -#define FIXED_POINT 16 -namespace kissfft_fixed16 { -#include "kiss_fft.h" -#include "tools/kiss_fftr.h" -} // namespace kissfft_fixed16 -#undef FIXED_POINT -#undef kiss_fft_scalar -#undef KISS_FFT_H - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_KISS_FFT_INT16_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.c deleted file mode 100644 index f59618e0..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.c +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h" -const uint16_t kLogLut[] -#ifndef _MSC_VER - __attribute__((aligned(4))) -#endif // _MSV_VER - = {0, 224, 442, 654, 861, 1063, 1259, 1450, 1636, 1817, 1992, 2163, - 2329, 2490, 2646, 2797, 2944, 3087, 3224, 3358, 3487, 3611, 3732, 3848, - 3960, 4068, 4172, 4272, 4368, 4460, 4549, 4633, 4714, 4791, 4864, 4934, - 5001, 5063, 5123, 5178, 5231, 5280, 5326, 5368, 5408, 5444, 5477, 5507, - 5533, 5557, 5578, 5595, 5610, 5622, 5631, 5637, 5640, 5641, 5638, 5633, - 5626, 5615, 5602, 5586, 5568, 5547, 5524, 5498, 5470, 5439, 5406, 5370, - 5332, 5291, 5249, 5203, 5156, 5106, 5054, 5000, 4944, 4885, 4825, 4762, - 4697, 4630, 4561, 4490, 4416, 4341, 4264, 4184, 4103, 4020, 3935, 3848, - 3759, 3668, 3575, 3481, 3384, 3286, 3186, 3084, 2981, 2875, 2768, 2659, - 2549, 2437, 2323, 2207, 2090, 1971, 1851, 1729, 1605, 1480, 1353, 1224, - 1094, 963, 830, 695, 559, 421, 282, 142, 0, 0}; diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.h deleted file mode 100644 index b2448a32..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_lut.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_ - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -// Number of segments in the log lookup table. The table will be kLogSegments+1 -// in length (with some padding). -#define kLogSegments 128 -#define kLogSegmentsLog2 7 - -// Scale used by lookup table. -#define kLogScale 65536 -#define kLogScaleLog2 16 -#define kLogCoeff 45426 - -extern const uint16_t kLogLut[]; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_LUT_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.c deleted file mode 100644 index c27a50a6..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.c +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h" - -#include "tensorflow/lite/experimental/microfrontend/lib/bits.h" -#include "tensorflow/lite/experimental/microfrontend/lib/log_lut.h" - -#define kuint16max 0x0000FFFF - -// The following functions implement integer logarithms of various sizes. The -// approximation is calculated according to method described in -// www.inti.gob.ar/electronicaeinformatica/instrumentacion/utic/ -// publicaciones/SPL2007/Log10-spl07.pdf -// It first calculates log2 of the input and then converts it to natural -// logarithm. - -static uint32_t Log2FractionPart(const uint32_t x, const uint32_t log2x) { - // Part 1 - int32_t frac = x - (1LL << log2x); - if (log2x < kLogScaleLog2) { - frac <<= kLogScaleLog2 - log2x; - } else { - frac >>= log2x - kLogScaleLog2; - } - // Part 2 - const uint32_t base_seg = frac >> (kLogScaleLog2 - kLogSegmentsLog2); - const uint32_t seg_unit = - (((uint32_t)1) << kLogScaleLog2) >> kLogSegmentsLog2; - - const int32_t c0 = kLogLut[base_seg]; - const int32_t c1 = kLogLut[base_seg + 1]; - const int32_t seg_base = seg_unit * base_seg; - const int32_t rel_pos = ((c1 - c0) * (frac - seg_base)) >> kLogScaleLog2; - return frac + c0 + rel_pos; -} - -static uint32_t Log(const uint32_t x, const uint32_t scale_shift) { - const uint32_t integer = MostSignificantBit32(x) - 1; - const uint32_t fraction = Log2FractionPart(x, integer); - const uint32_t log2 = (integer << kLogScaleLog2) + fraction; - const uint32_t round = kLogScale / 2; - const uint32_t loge = (((uint64_t)kLogCoeff) * log2 + round) >> kLogScaleLog2; - // Finally scale to our output scale - const uint32_t loge_scaled = ((loge << scale_shift) + round) >> kLogScaleLog2; - return loge_scaled; -} - -uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal, - int signal_size, int correction_bits) { - const int scale_shift = state->scale_shift; - uint16_t* output = (uint16_t*)signal; - uint16_t* ret = output; - int i; - for (i = 0; i < signal_size; ++i) { - uint32_t value = *signal++; - if (state->enable_log) { - if (correction_bits < 0) { - value >>= -correction_bits; - } else { - value <<= correction_bits; - } - if (value > 1) { - value = Log(value, scale_shift); - } else { - value = 0; - } - } - *output++ = (value < kuint16max) ? value : kuint16max; - } - return ret; -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.h deleted file mode 100644 index a383f32f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct LogScaleState { - int enable_log; - int scale_shift; -}; - -// Applies a fixed point logarithm to the signal and converts it to 16 bit. Note -// that the signal array will be modified. -uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal, - int signal_size, int correction_bits); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c deleted file mode 100644 index 0e3dd1d1..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.c +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h" - -void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config) { - config->enable_log = 1; - config->scale_shift = 6; -} - -int LogScalePopulateState(const struct LogScaleConfig* config, - struct LogScaleState* state) { - state->enable_log = config->enable_log; - state->scale_shift = config->scale_shift; - return 1; -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h deleted file mode 100644 index 11f7d9ee..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_ - -#include -#include - -#include "tensorflow/lite/experimental/microfrontend/lib/log_scale.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct LogScaleConfig { - // set to false (0) to disable this module - int enable_log; - // scale results by 2^(scale_shift) - int scale_shift; -}; - -// Populates the LogScaleConfig with "sane" default values. -void LogScaleFillConfigWithDefaults(struct LogScaleConfig* config); - -// Allocates any buffers. -int LogScalePopulateState(const struct LogScaleConfig* config, - struct LogScaleState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_LOG_SCALE_UTIL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c deleted file mode 100644 index 16b30e66..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.c +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h" - -#include - -void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal) { - int i; - for (i = 0; i < state->num_channels; ++i) { - const uint32_t smoothing = - ((i & 1) == 0) ? state->even_smoothing : state->odd_smoothing; - const uint32_t one_minus_smoothing = (1 << kNoiseReductionBits) - smoothing; - - // Update the estimate of the noise. - const uint32_t signal_scaled_up = signal[i] << state->smoothing_bits; - uint32_t estimate = - (((uint64_t)signal_scaled_up * smoothing) + - ((uint64_t)state->estimate[i] * one_minus_smoothing)) >> - kNoiseReductionBits; - state->estimate[i] = estimate; - - // Make sure that we can't get a negative value for the signal - estimate. - if (estimate > signal_scaled_up) { - estimate = signal_scaled_up; - } - - const uint32_t floor = - ((uint64_t)signal[i] * state->min_signal_remaining) >> - kNoiseReductionBits; - const uint32_t subtracted = - (signal_scaled_up - estimate) >> state->smoothing_bits; - const uint32_t output = subtracted > floor ? subtracted : floor; - signal[i] = output; - } -} - -void NoiseReductionReset(struct NoiseReductionState* state) { - memset(state->estimate, 0, sizeof(*state->estimate) * state->num_channels); -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h deleted file mode 100644 index 46d3f52e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_ - -#define kNoiseReductionBits 14 - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct NoiseReductionState { - int smoothing_bits; - uint16_t even_smoothing; - uint16_t odd_smoothing; - uint16_t min_signal_remaining; - int num_channels; - uint32_t* estimate; -}; - -// Removes stationary noise from each channel of the signal using a low pass -// filter. -void NoiseReductionApply(struct NoiseReductionState* state, uint32_t* signal); - -void NoiseReductionReset(struct NoiseReductionState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c deleted file mode 100644 index a6c9234e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.c +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h" - -#include - -void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config) { - config->smoothing_bits = 10; - config->even_smoothing = 0.025; - config->odd_smoothing = 0.06; - config->min_signal_remaining = 0.05; -} - -int NoiseReductionPopulateState(const struct NoiseReductionConfig* config, - struct NoiseReductionState* state, - int num_channels) { - state->smoothing_bits = config->smoothing_bits; - state->odd_smoothing = config->odd_smoothing * (1 << kNoiseReductionBits); - state->even_smoothing = config->even_smoothing * (1 << kNoiseReductionBits); - state->min_signal_remaining = - config->min_signal_remaining * (1 << kNoiseReductionBits); - state->num_channels = num_channels; - state->estimate = calloc(state->num_channels, sizeof(*state->estimate)); - if (state->estimate == NULL) { - fprintf(stderr, "Failed to alloc estimate buffer\n"); - return 0; - } - return 1; -} - -void NoiseReductionFreeStateContents(struct NoiseReductionState* state) { - free(state->estimate); -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h deleted file mode 100644 index fa555391..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_ - -#include "tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct NoiseReductionConfig { - // scale the signal up by 2^(smoothing_bits) before reduction - int smoothing_bits; - // smoothing coefficient for even-numbered channels - float even_smoothing; - // smoothing coefficient for odd-numbered channels - float odd_smoothing; - // fraction of signal to preserve (1.0 disables this module) - float min_signal_remaining; -}; - -// Populates the NoiseReductionConfig with "sane" default values. -void NoiseReductionFillConfigWithDefaults(struct NoiseReductionConfig* config); - -// Allocates any buffers. -int NoiseReductionPopulateState(const struct NoiseReductionConfig* config, - struct NoiseReductionState* state, - int num_channels); - -// Frees any allocated buffers. -void NoiseReductionFreeStateContents(struct NoiseReductionState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_NOISE_REDUCTION_UTIL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c deleted file mode 100644 index 22d58767..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h" - -#include "tensorflow/lite/experimental/microfrontend/lib/bits.h" - -int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut) { - if (x <= 2) { - return lut[x]; - } - - const int16_t interval = MostSignificantBit32(x); - lut += 4 * interval - 6; - - const int16_t frac = - ((interval < 11) ? (x << (11 - interval)) : (x >> (interval - 11))) & - 0x3FF; - - int32_t result = ((int32_t)lut[2] * frac) >> 5; - result += (int32_t)((uint32_t)lut[1] << 5); - result *= frac; - result = (result + (1 << 14)) >> 15; - result += lut[0]; - return (int16_t)result; -} - -uint32_t PcanShrink(const uint32_t x) { - if (x < (2 << kPcanSnrBits)) { - return (x * x) >> (2 + 2 * kPcanSnrBits - kPcanOutputBits); - } else { - return (x >> (kPcanSnrBits - kPcanOutputBits)) - (1 << kPcanOutputBits); - } -} - -void PcanGainControlApply(struct PcanGainControlState* state, - uint32_t* signal) { - int i; - for (i = 0; i < state->num_channels; ++i) { - const uint32_t gain = - WideDynamicFunction(state->noise_estimate[i], state->gain_lut); - const uint32_t snr = ((uint64_t)signal[i] * gain) >> state->snr_shift; - signal[i] = PcanShrink(snr); - } -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h deleted file mode 100644 index 3f6222be..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_ - -#include -#include - -#define kPcanSnrBits 12 -#define kPcanOutputBits 6 - -#ifdef __cplusplus -extern "C" { -#endif - -// Details at https://research.google/pubs/pub45911.pdf -struct PcanGainControlState { - int enable_pcan; - uint32_t* noise_estimate; - int num_channels; - int16_t* gain_lut; - int32_t snr_shift; -}; - -int16_t WideDynamicFunction(const uint32_t x, const int16_t* lut); - -uint32_t PcanShrink(const uint32_t x); - -void PcanGainControlApply(struct PcanGainControlState* state, uint32_t* signal); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c deleted file mode 100644 index e850d439..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h" - -#include -#include - -#define kint16max 0x00007FFF - -void PcanGainControlFillConfigWithDefaults( - struct PcanGainControlConfig* config) { - config->enable_pcan = 0; - config->strength = 0.95; - config->offset = 80.0; - config->gain_bits = 21; -} - -int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config, - int32_t input_bits, uint32_t x) { - const float x_as_float = ((float)x) / ((uint32_t)1 << input_bits); - const float gain_as_float = - ((uint32_t)1 << config->gain_bits) * - powf(x_as_float + config->offset, -config->strength); - - if (gain_as_float > kint16max) { - return kint16max; - } - return (int16_t)(gain_as_float + 0.5f); -} - -int PcanGainControlPopulateState(const struct PcanGainControlConfig* config, - struct PcanGainControlState* state, - uint32_t* noise_estimate, - const int num_channels, - const uint16_t smoothing_bits, - const int32_t input_correction_bits) { - state->enable_pcan = config->enable_pcan; - if (!state->enable_pcan) { - return 1; - } - state->noise_estimate = noise_estimate; - state->num_channels = num_channels; - state->gain_lut = malloc(kWideDynamicFunctionLUTSize * sizeof(int16_t)); - if (state->gain_lut == NULL) { - fprintf(stderr, "Failed to allocate gain LUT\n"); - return 0; - } - state->snr_shift = config->gain_bits - input_correction_bits - kPcanSnrBits; - - const int32_t input_bits = smoothing_bits - input_correction_bits; - state->gain_lut[0] = PcanGainLookupFunction(config, input_bits, 0); - state->gain_lut[1] = PcanGainLookupFunction(config, input_bits, 1); - state->gain_lut -= 6; - int interval; - for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) { - const uint32_t x0 = (uint32_t)1 << (interval - 1); - const uint32_t x1 = x0 + (x0 >> 1); - const uint32_t x2 = - (interval == kWideDynamicFunctionBits) ? x0 + (x0 - 1) : 2 * x0; - - const int16_t y0 = PcanGainLookupFunction(config, input_bits, x0); - const int16_t y1 = PcanGainLookupFunction(config, input_bits, x1); - const int16_t y2 = PcanGainLookupFunction(config, input_bits, x2); - - const int32_t diff1 = (int32_t)y1 - y0; - const int32_t diff2 = (int32_t)y2 - y0; - const int32_t a1 = 4 * diff1 - diff2; - const int32_t a2 = diff2 - a1; - - state->gain_lut[4 * interval] = y0; - state->gain_lut[4 * interval + 1] = (int16_t)a1; - state->gain_lut[4 * interval + 2] = (int16_t)a2; - } - state->gain_lut += 6; - return 1; -} - -void PcanGainControlFreeStateContents(struct PcanGainControlState* state) { - free(state->gain_lut); -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h deleted file mode 100644 index d4bfaa2e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_ - -#include "tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h" - -#define kWideDynamicFunctionBits 32 -#define kWideDynamicFunctionLUTSize (4 * kWideDynamicFunctionBits - 3) - -#ifdef __cplusplus -extern "C" { -#endif - -struct PcanGainControlConfig { - // set to false (0) to disable this module - int enable_pcan; - // gain normalization exponent (0.0 disables, 1.0 full strength) - float strength; - // positive value added in the normalization denominator - float offset; - // number of fractional bits in the gain - int gain_bits; -}; - -void PcanGainControlFillConfigWithDefaults( - struct PcanGainControlConfig* config); - -int16_t PcanGainLookupFunction(const struct PcanGainControlConfig* config, - int32_t input_bits, uint32_t x); - -int PcanGainControlPopulateState(const struct PcanGainControlConfig* config, - struct PcanGainControlState* state, - uint32_t* noise_estimate, - const int num_channels, - const uint16_t smoothing_bits, - const int32_t input_correction_bits); - -void PcanGainControlFreeStateContents(struct PcanGainControlState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_PCAN_GAIN_CONTROL_UTIL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.c deleted file mode 100644 index 10da6762..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.c +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/window.h" - -#include - -int WindowProcessSamples(struct WindowState* state, const int16_t* samples, - size_t num_samples, size_t* num_samples_read) { - const int size = state->size; - - // Copy samples from the samples buffer over to our local input. - size_t max_samples_to_copy = state->size - state->input_used; - if (max_samples_to_copy > num_samples) { - max_samples_to_copy = num_samples; - } - memcpy(state->input + state->input_used, samples, - max_samples_to_copy * sizeof(*samples)); - *num_samples_read = max_samples_to_copy; - state->input_used += max_samples_to_copy; - - if (state->input_used < state->size) { - // We don't have enough samples to compute a window. - return 0; - } - - // Apply the window to the input. - const int16_t* coefficients = state->coefficients; - const int16_t* input = state->input; - int16_t* output = state->output; - int i; - int16_t max_abs_output_value = 0; - for (i = 0; i < size; ++i) { - int16_t new_value = - (((int32_t)*input++) * *coefficients++) >> kFrontendWindowBits; - *output++ = new_value; - if (new_value < 0) { - new_value = -new_value; - } - if (new_value > max_abs_output_value) { - max_abs_output_value = new_value; - } - } - // Shuffle the input down by the step size, and update how much we have used. - memmove(state->input, state->input + state->step, - sizeof(*state->input) * (state->size - state->step)); - state->input_used -= state->step; - state->max_abs_output_value = max_abs_output_value; - - // Indicate that the output buffer is valid for the next stage. - return 1; -} - -void WindowReset(struct WindowState* state) { - memset(state->input, 0, state->size * sizeof(*state->input)); - memset(state->output, 0, state->size * sizeof(*state->output)); - state->input_used = 0; - state->max_abs_output_value = 0; -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.h deleted file mode 100644 index bad81514..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window.h +++ /dev/null @@ -1,49 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_ - -#include -#include - -#define kFrontendWindowBits 12 - -#ifdef __cplusplus -extern "C" { -#endif - -struct WindowState { - size_t size; - int16_t* coefficients; - size_t step; - - int16_t* input; - size_t input_used; - int16_t* output; - int16_t max_abs_output_value; -}; - -// Applies a window to the samples coming in, stepping forward at the given -// rate. -int WindowProcessSamples(struct WindowState* state, const int16_t* samples, - size_t num_samples, size_t* num_samples_read); - -void WindowReset(struct WindowState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.c b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.c deleted file mode 100644 index eee6e7b5..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.c +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/microfrontend/lib/window_util.h" - -#include -#include -#include -#include - -// Some platforms don't have M_PI -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - -void WindowFillConfigWithDefaults(struct WindowConfig* config) { - config->size_ms = 25; - config->step_size_ms = 10; -} - -int WindowPopulateState(const struct WindowConfig* config, - struct WindowState* state, int sample_rate) { - state->size = config->size_ms * sample_rate / 1000; - state->step = config->step_size_ms * sample_rate / 1000; - - state->coefficients = malloc(state->size * sizeof(*state->coefficients)); - if (state->coefficients == NULL) { - fprintf(stderr, "Failed to allocate window coefficients\n"); - return 0; - } - - // Populate the window values. - const float arg = M_PI * 2.0 / ((float)state->size); - int i; - for (i = 0; i < state->size; ++i) { - float float_value = 0.5 - (0.5 * cos(arg * (i + 0.5))); - // Scale it to fixed point and round it. - state->coefficients[i] = - floor(float_value * (1 << kFrontendWindowBits) + 0.5); - } - - state->input_used = 0; - state->input = malloc(state->size * sizeof(*state->input)); - if (state->input == NULL) { - fprintf(stderr, "Failed to allocate window input\n"); - return 0; - } - - state->output = malloc(state->size * sizeof(*state->output)); - if (state->output == NULL) { - fprintf(stderr, "Failed to allocate window output\n"); - return 0; - } - - return 1; -} - -void WindowFreeStateContents(struct WindowState* state) { - free(state->coefficients); - free(state->input); - free(state->output); -} diff --git a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.h b/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.h deleted file mode 100644 index 68e4de9e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/experimental/microfrontend/lib/window_util.h +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_ - -#include "tensorflow/lite/experimental/microfrontend/lib/window.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct WindowConfig { - // length of window frame in milliseconds - size_t size_ms; - // length of step for next frame in milliseconds - size_t step_size_ms; -}; - -// Populates the WindowConfig with "sane" default values. -void WindowFillConfigWithDefaults(struct WindowConfig* config); - -// Allocates any buffers. -int WindowPopulateState(const struct WindowConfig* config, - struct WindowState* state, int sample_rate); - -// Frees any allocated buffers. -void WindowFreeStateContents(struct WindowState* state); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_LIB_WINDOW_UTIL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/common.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/common.h deleted file mode 100644 index 205294fd..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/common.h +++ /dev/null @@ -1,1180 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_ - -#include -#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK -#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK -#endif -#endif - -#include - -#include "fixedpoint/fixedpoint.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/optimized/neon_check.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -constexpr int kReverseShift = -1; - -inline void GetActivationMinMax(FusedActivationFunctionType ac, - float* output_activation_min, - float* output_activation_max) { - switch (ac) { - case FusedActivationFunctionType::kNone: - *output_activation_min = std::numeric_limits::lowest(); - *output_activation_max = std::numeric_limits::max(); - break; - case FusedActivationFunctionType::kRelu: - *output_activation_min = 0.f; - *output_activation_max = std::numeric_limits::max(); - break; - case FusedActivationFunctionType::kRelu1: - *output_activation_min = -1.f; - *output_activation_max = 1.f; - break; - case FusedActivationFunctionType::kRelu6: - *output_activation_min = 0.f; - *output_activation_max = 6.f; - break; - } -} - -template -inline T ActivationFunctionWithMinMax(T x, T output_activation_min, - T output_activation_max) { - using std::max; - using std::min; - return min(max(x, output_activation_min), output_activation_max); -} - -// Legacy function, left for compatibility only. -template -float ActivationFunction(float x) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - return ActivationFunctionWithMinMax(x, output_activation_min, - output_activation_max); -} - -inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, - const float* bias_data, int array_size, - float* array_data) { - if (bias_size == 0) return; - // Note: see b/132215220: in May 2019 we thought it would be OK to replace - // this with the Eigen one-liner: - // return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max). - // This turned out to severely regress performance: +4ms (i.e. 8%) on - // MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now. - TFLITE_DCHECK_EQ((array_size % bias_size), 0); -#ifdef USE_NEON - float* array_ptr = array_data; - float* array_end_ptr = array_ptr + array_size; - const auto clamp_min_vec = vdupq_n_f32(clamp_min); - const auto clamp_max_vec = vdupq_n_f32(clamp_max); - for (; array_ptr != array_end_ptr; array_ptr += bias_size) { - int i = 0; - for (; i <= bias_size - 16; i += 16) { - auto b0 = vld1q_f32(bias_data + i); - auto b1 = vld1q_f32(bias_data + i + 4); - auto b2 = vld1q_f32(bias_data + i + 8); - auto b3 = vld1q_f32(bias_data + i + 12); - auto a0 = vld1q_f32(array_ptr + i); - auto a1 = vld1q_f32(array_ptr + i + 4); - auto a2 = vld1q_f32(array_ptr + i + 8); - auto a3 = vld1q_f32(array_ptr + i + 12); - auto x0 = vaddq_f32(a0, b0); - auto x1 = vaddq_f32(a1, b1); - auto x2 = vaddq_f32(a2, b2); - auto x3 = vaddq_f32(a3, b3); - x0 = vmaxq_f32(clamp_min_vec, x0); - x1 = vmaxq_f32(clamp_min_vec, x1); - x2 = vmaxq_f32(clamp_min_vec, x2); - x3 = vmaxq_f32(clamp_min_vec, x3); - x0 = vminq_f32(clamp_max_vec, x0); - x1 = vminq_f32(clamp_max_vec, x1); - x2 = vminq_f32(clamp_max_vec, x2); - x3 = vminq_f32(clamp_max_vec, x3); - vst1q_f32(array_ptr + i, x0); - vst1q_f32(array_ptr + i + 4, x1); - vst1q_f32(array_ptr + i + 8, x2); - vst1q_f32(array_ptr + i + 12, x3); - } - for (; i <= bias_size - 4; i += 4) { - auto b = vld1q_f32(bias_data + i); - auto a = vld1q_f32(array_ptr + i); - auto x = vaddq_f32(a, b); - x = vmaxq_f32(clamp_min_vec, x); - x = vminq_f32(clamp_max_vec, x); - vst1q_f32(array_ptr + i, x); - } - for (; i < bias_size; i++) { - array_ptr[i] = ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i], - clamp_min, clamp_max); - } - } -#else // not NEON - for (int array_offset = 0; array_offset < array_size; - array_offset += bias_size) { - for (int i = 0; i < bias_size; i++) { - array_data[array_offset + i] = ActivationFunctionWithMinMax( - array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max); - } - } -#endif -} - -// Single-rounding MultiplyByQuantizedMultiplier -#if TFLITE_SINGLE_ROUNDING -inline int32_t MultiplyByQuantizedMultiplier(int32_t x, - int32_t quantized_multiplier, - int shift) { - TFLITE_DCHECK(quantized_multiplier >= 0); - TFLITE_DCHECK(shift >= -31 && shift <= 30); - - const int64_t total_shift = 31 - shift; - const int64_t round = static_cast(1) << (total_shift - 1); - int64_t result = x * static_cast(quantized_multiplier) + round; - result = result >> total_shift; - - TFLITE_DCHECK(result >= std::numeric_limits::min() && - result <= std::numeric_limits::max()); - return static_cast(result); -} - -inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp( - int32_t x, int32_t quantized_multiplier, int shift) { - TFLITE_DCHECK_LE(shift, 0); - return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); -} - -inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne( - int32_t x, int32_t quantized_multiplier, int shift) { - TFLITE_DCHECK_GE(shift, 0); - return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); -} - -inline int32_t MultiplyByQuantizedMultiplier(int64_t x, - int32_t quantized_multiplier, - int shift) { - // Inputs: - // - quantized_multiplier has fixed point at bit 31 - // - shift is -31 to +7 (negative for right shift) - // - // Assumptions: The following input ranges are assumed - // - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1) - // - scaling is chosen so final scaled result fits in int32_t - // - input x is in the range -(1<<47) <= x < (1<<47) - TFLITE_DCHECK(quantized_multiplier >= 0); - TFLITE_DCHECK(shift >= -31 && shift < 8); - TFLITE_DCHECK(x >= -(static_cast(1) << 47) && - x < (static_cast(1) << 47)); - - const int32_t reduced_multiplier = - (quantized_multiplier < 0x7FFF0000) - ? ((quantized_multiplier + (1 << 15)) >> 16) - : 0x7FFF; - const int64_t total_shift = 15 - shift; - const int64_t round = static_cast(1) << (total_shift - 1); - int64_t result = x * static_cast(reduced_multiplier) + round; - result = result >> total_shift; - - TFLITE_DCHECK(result >= std::numeric_limits::min() && - result <= std::numeric_limits::max()); - return static_cast(result); -} - -#ifdef USE_NEON -inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows( - int32x4x4_t input_val, int32_t quantized_multiplier, int shift) { - TFLITE_DCHECK(quantized_multiplier >= 0); - - const int right_shift = std::min(-1, shift); - const int left_shift = shift - right_shift; - - const int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier); - const int32x4_t left_shift_dup = vdupq_n_s32(left_shift); - const int32x4_t right_shift_dup = vdupq_n_s32(right_shift); - - int32x4x4_t result; - result.val[0] = vrshlq_s32( - vqdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), multiplier_dup), - right_shift_dup); - - result.val[1] = vrshlq_s32( - vqdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), multiplier_dup), - right_shift_dup); - - result.val[2] = vrshlq_s32( - vqdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), multiplier_dup), - right_shift_dup); - - result.val[3] = vrshlq_s32( - vqdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), multiplier_dup), - right_shift_dup); - - return result; -} -#endif // USE_NEON -// Double-rounding MultiplyByQuantizedMultiplier -#else -inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp( - int32_t x, int32_t quantized_multiplier, int left_shift) { - using gemmlowp::RoundingDivideByPOT; - using gemmlowp::SaturatingRoundingDoublingHighMul; - return RoundingDivideByPOT( - SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift); -} - -inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne( - int32_t x, int32_t quantized_multiplier, int left_shift) { - using gemmlowp::SaturatingRoundingDoublingHighMul; - return SaturatingRoundingDoublingHighMul(x * (1 << left_shift), - quantized_multiplier); -} - -inline int32_t MultiplyByQuantizedMultiplier(int32_t x, - int32_t quantized_multiplier, - int shift) { - using gemmlowp::RoundingDivideByPOT; - using gemmlowp::SaturatingRoundingDoublingHighMul; - int left_shift = shift > 0 ? shift : 0; - int right_shift = shift > 0 ? 0 : -shift; - return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul( - x * (1 << left_shift), quantized_multiplier), - right_shift); -} - -inline int32_t MultiplyByQuantizedMultiplier(int64_t x, - int32_t quantized_multiplier, - int shift) { - // Inputs: - // - quantized_multiplier has fixed point at bit 31 - // - shift is -31 to +7 (negative for right shift) - // - // Assumptions: The following input ranges are assumed - // - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1) - // - scaling is chosen so final scaled result fits in int32_t - // - input x is in the range -(1<<47) <= x < (1<<47) - assert(quantized_multiplier >= 0); - assert(shift >= -31 && shift < 8); - assert(x >= -(static_cast(1) << 47) && - x < (static_cast(1) << 47)); - - int32_t reduced_multiplier = (quantized_multiplier < 0x7FFF0000) - ? ((quantized_multiplier + (1 << 15)) >> 16) - : 0x7FFF; - int total_shift = 15 - shift; - x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1)); - int32_t result = x >> total_shift; - return result; -} - -#ifdef USE_NEON -// Round uses ARM's rounding shift right. -inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows( - int32x4x4_t input_val, int32_t quantized_multiplier, int shift) { - const int left_shift = std::max(shift, 0); - const int right_shift = std::min(shift, 0); - int32x4x4_t result; - - int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier); - int32x4_t left_shift_dup = vdupq_n_s32(left_shift); - int32x4_t right_shift_dup = vdupq_n_s32(right_shift); - - result.val[0] = - vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), - multiplier_dup), - right_shift_dup); - - result.val[1] = - vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), - multiplier_dup), - right_shift_dup); - - result.val[2] = - vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), - multiplier_dup), - right_shift_dup); - - result.val[3] = - vrshlq_s32(vqrdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), - multiplier_dup), - right_shift_dup); - - return result; -} -#endif // USE_NEON -#endif // TFLITE_SINGLE_ROUNDING - -template -int CountLeadingZeros(T integer_input) { - static_assert(std::is_unsigned::value, - "Only unsigned integer types handled."); -#if defined(__GNUC__) - return integer_input ? __builtin_clz(integer_input) - : std::numeric_limits::digits; -#else - if (integer_input == 0) { - return std::numeric_limits::digits; - } - - const T one_in_leading_positive = static_cast(1) - << (std::numeric_limits::digits - 1); - int leading_zeros = 0; - while (integer_input < one_in_leading_positive) { - integer_input <<= 1; - ++leading_zeros; - } - return leading_zeros; -#endif -} - -template -inline int CountLeadingSignBits(T integer_input) { - static_assert(std::is_signed::value, "Only signed integer types handled."); -#if defined(__GNUC__) && !defined(__clang__) - return integer_input ? __builtin_clrsb(integer_input) - : std::numeric_limits::digits; -#else - using U = typename std::make_unsigned::type; - return integer_input >= 0 - ? CountLeadingZeros(static_cast(integer_input)) - 1 - : integer_input != std::numeric_limits::min() - ? CountLeadingZeros(2 * static_cast(-integer_input) - 1) - : 0; -#endif -} - -// Use "count leading zeros" helper functions to do a fast Floor(log_2(x)). -template -inline Integer FloorLog2(Integer n) { - static_assert(std::is_integral::value, ""); - static_assert(std::is_signed::value, ""); - static_assert(sizeof(Integer) == 4 || sizeof(Integer) == 8, ""); - TFLITE_CHECK_GT(n, 0); - if (sizeof(Integer) == 4) { - return 30 - CountLeadingSignBits(n); - } else { - return 62 - CountLeadingSignBits(n); - } -} - -// The size of the LUT depends on the type of input. For int8 inputs a simple -// 256 entries LUT is used. For int16 inputs the high 9 bits are used for -// indexing and the 7 remaining bits are used for interpolation. We thus use a -// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry -// to interpolate the last value. -template -constexpr int lut_size() { - static_assert(std::is_same::value || - std::is_same::value, - "Only LUTs with int8 or int16 inputs are supported."); - return std::is_same::value ? 256 : 513; -} - -// Generate a LUT for 'func' which can be used to approximate functions like -// exp, log, ... -// -// - func: the function to build the LUT for (e.g exp(x)) -// - input_min, input_max: range of the func inputs -// - output_min, output_max: range of the func outputs -// - lut: pointer to the LUT table to fill, the table must be of size -// lut_size() -template -inline void gen_lut(FloatT (*func)(FloatT), FloatT input_min, FloatT input_max, - FloatT output_min, FloatT output_max, LutOutT* lut) { - static_assert(std::is_same::value || - std::is_same::value, - "Only LUTs with int8 or int16 inputs are supported."); - static_assert(std::is_same::value || - std::is_same::value, - "Only LUTs with int8 or int16 outputs are supported."); - static_assert(std::is_floating_point::value, - "FloatT must be a floating-point type."); - - const int nb_steps = std::is_same::value ? 256 : 512; - const FloatT step = (input_max - input_min) / nb_steps; - const FloatT half_step = step / 2; - const FloatT output_scaling_inv = - static_cast(std::numeric_limits::max() - - std::numeric_limits::min() + 1) / - (output_max - output_min); - const FloatT table_min = - static_cast(std::numeric_limits::min()); - const FloatT table_max = - static_cast(std::numeric_limits::max()); - - for (int i = 0; i < nb_steps; i++) { - const FloatT val = func(input_min + i * step); - const FloatT val_midpoint = func(input_min + i * step + half_step); - const FloatT val_next = func(input_min + (i + 1) * step); - - const FloatT sample_val = TfLiteRound(val * output_scaling_inv); - const FloatT midpoint_interp_val = - TfLiteRound((val_next * output_scaling_inv + - TfLiteRound(val * output_scaling_inv)) / - 2); - const FloatT midpoint_val = TfLiteRound(val_midpoint * output_scaling_inv); - const FloatT midpoint_err = midpoint_interp_val - midpoint_val; - const FloatT bias = TfLiteRound(midpoint_err / 2); - - lut[i] = static_cast(std::min( - std::max(sample_val - bias, table_min), table_max)); - } - - const bool with_extra_interpolation_value = - std::is_same::value; - if (with_extra_interpolation_value) { - lut[nb_steps] = static_cast(std::min( - std::max(TfLiteRound(func(input_max) * output_scaling_inv), - table_min), - table_max)); - } -} - -// LUT must have 513 values -template -inline LutOutT lut_lookup_with_interpolation(int16_t value, - const LutOutT* lut) { - static_assert(std::is_same::value || - std::is_same::value, - "Only LUTs with int8 or int16 outputs are supported."); - // 512 base values, lut[513] is only used to calculate the slope - const uint16_t index = static_cast(256 + (value >> 7)); - assert(index < 512 && "LUT index out of range."); - const int16_t offset = value & 0x7f; - - // Base and slope are Q0.x - const LutOutT base = lut[index]; - const LutOutT slope = lut[index + 1] - lut[index]; - - // Q0.x * Q0.7 = Q0.(x + 7) - // Round and convert from Q0.(x + 7) to Q0.x - const int delta = (slope * offset + 64) >> 7; - - // Q0.15 + Q0.15 - return static_cast(base + delta); -} - -// int16_t -> int16_t table lookup with interpolation -// LUT must have 513 values -inline int16_t lut_lookup(int16_t value, const int16_t* lut) { - return lut_lookup_with_interpolation(value, lut); -} - -// int16_t -> int8_t table lookup with interpolation -// LUT must have 513 values -inline int8_t lut_lookup(int16_t value, const int8_t* lut) { - return lut_lookup_with_interpolation(value, lut); -} - -// int8_t -> int8_t table lookup without interpolation -// LUT must have 256 values -inline int8_t lut_lookup(int8_t value, const int8_t* lut) { - return lut[128 + value]; -} - -// int8_t -> int16_t table lookup without interpolation -// LUT must have 256 values -inline int16_t lut_lookup(int8_t value, const int16_t* lut) { - return lut[128 + value]; -} - -// Table of sigmoid(i/24) at 0.16 format - 256 elements. - -// We use combined sigmoid and tanh look-up table, since -// tanh(x) = 2*sigmoid(2*x) -1. -// Both functions are symmetric, so the LUT table is only needed -// for the absolute value of the input. -static const uint16_t sigmoid_table_uint16[256] = { - 32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498, - 40149, 40794, 41432, 42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255, - 46817, 47369, 47911, 48443, 48964, 49475, 49975, 50464, 50942, 51409, 51865, - 52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485, 55834, 56174, - 56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288, - 59519, 59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441, - 61599, 61750, 61896, 62036, 62172, 62302, 62428, 62549, 62666, 62778, 62886, - 62990, 63090, 63186, 63279, 63368, 63454, 63536, 63615, 63691, 63765, 63835, - 63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405, 64450, - 64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845, - 64873, 64900, 64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097, - 65115, 65132, 65149, 65164, 65179, 65194, 65208, 65221, 65234, 65246, 65258, - 65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337, 65345, 65352, 65360, - 65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425, - 65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465, - 65468, 65471, 65474, 65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491, - 65493, 65495, 65497, 65498, 65500, 65501, 65503, 65504, 65505, 65507, 65508, - 65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65517, 65518, - 65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525, - 65525, 65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529, - 65529, 65529, 65530, 65530, 65530, 65530, 65531, 65531, 65531, 65531, 65531, - 65532, 65532, 65532, 65532, 65532, 65532, 65533, 65533, 65533, 65533, 65533, - 65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534, - 65534, 65534, 65535}; - -// TODO(b/77858996): Add these to gemmlowp. -template -IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) { - static_assert(std::is_same::value, "unimplemented"); - return a; -} - -template <> -inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) { - std::int64_t a64 = a; - std::int64_t b64 = b; - std::int64_t sum = a64 + b64; - return static_cast(std::min( - static_cast(std::numeric_limits::max()), - std::max( - static_cast(std::numeric_limits::min()), - sum))); -} - -template -gemmlowp::FixedPoint SaturatingAddNonGemmlowp( - gemmlowp::FixedPoint a, - gemmlowp::FixedPoint b) { - return gemmlowp::FixedPoint::FromRaw( - SaturatingAddNonGemmlowp(a.raw(), b.raw())); -} - -template -IntegerType SaturatingSub(IntegerType a, IntegerType b) { - static_assert(std::is_same::value, "unimplemented"); - return a; -} - -template <> -inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) { - std::int32_t a32 = a; - std::int32_t b32 = b; - std::int32_t diff = a32 - b32; - return static_cast( - std::min(static_cast(32767), - std::max(static_cast(-32768), diff))); -} - -template <> -inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) { - std::int64_t a64 = a; - std::int64_t b64 = b; - std::int64_t diff = a64 - b64; - return static_cast(std::min( - static_cast(std::numeric_limits::max()), - std::max( - static_cast(std::numeric_limits::min()), - diff))); -} - -template -gemmlowp::FixedPoint SaturatingSub( - gemmlowp::FixedPoint a, - gemmlowp::FixedPoint b) { - return gemmlowp::FixedPoint::FromRaw( - SaturatingSub(a.raw(), b.raw())); -} -// End section to be moved to gemmlowp. - -template -IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) { - if (exponent == 0) { - return x; - } - using ScalarIntegerType = - typename gemmlowp::FixedPointRawTypeTraits::ScalarRawType; - const IntegerType min = - gemmlowp::Dup(std::numeric_limits::min()); - const IntegerType max = - gemmlowp::Dup(std::numeric_limits::max()); - const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType); - - const std::int32_t threshold = - ((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1); - const IntegerType positive_mask = - gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup(threshold)); - const IntegerType negative_mask = - gemmlowp::MaskIfLessThan(x, gemmlowp::Dup(-threshold)); - - IntegerType result = gemmlowp::ShiftLeft(x, exponent); - result = gemmlowp::SelectUsingMask(positive_mask, max, result); - result = gemmlowp::SelectUsingMask(negative_mask, min, result); - return result; -} - -// If we want to leave IntegerBits fixed, then multiplication -// by a power of two has to be saturating/rounding, not exact anymore. -template -gemmlowp::FixedPoint -SaturatingRoundingMultiplyByPOTParam( - gemmlowp::FixedPoint a, int exponent) { - return gemmlowp::FixedPoint::FromRaw( - SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent)); -} - -// Convert int32_t multiplier to int16_t with rounding. -inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t, - int16_t* multiplier_int16_t) { - TFLITE_DCHECK_GE(multiplier_int32_t, 0); - static constexpr int32_t kRoundingOffset = 1 << 15; - if (multiplier_int32_t >= - std::numeric_limits::max() - kRoundingOffset) { - *multiplier_int16_t = std::numeric_limits::max(); - return; - } - const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16; - TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset); - TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset); - *multiplier_int16_t = result; - TFLITE_DCHECK_EQ(*multiplier_int16_t, result); -} - -// Minimum output bits to accommodate log of maximum input range. It actually -// does not matter if one considers, say, [-64,64] or [-64,64). -// -// For example, run this through Octave: -// [0:127; ... -// ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ... -// ceil(log(abs( log(2.^(0:127))+1 ))/log(2))] -constexpr int min_log_x_output_bits(int input_bits) { - return input_bits > 90 ? 7 - : input_bits > 44 ? 6 - : input_bits > 21 ? 5 - : input_bits > 10 ? 4 - : input_bits > 4 ? 3 - : input_bits > 1 ? 2 - : 1; -} - -// Although currently the name of this function says that it cannot handle -// values less than 1, in practice it can handle as low as 1/x_max, where -// x_max is the largest representable input. In other words, the output range -// is symmetric. -template -inline gemmlowp::FixedPoint -log_x_for_x_greater_than_or_equal_to_1_impl( - gemmlowp::FixedPoint input_val) { - // assert(__builtin_clz(0u) >= std::numeric_limits::digits - 1); - // assert(__builtin_clz(0u) <= std::numeric_limits::digits); - using FixedPoint0 = gemmlowp::FixedPoint; - // The reason for accumulating the result with an extra bit of headroom is - // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled * - // recip_denom will otherwise introduce an error. - static constexpr int kAccumIntegerBits = OutputIntegerBits + 1; - using FixedPointAccum = gemmlowp::FixedPoint; - - const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( - FixedPoint0, 1488522236, std::log(2.0)); - const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( - FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5))); - const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( - FixedPoint0, 1518500250, std::sqrt(0.5)); - const FixedPoint0 one_quarter = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0); - - const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( - FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0))); - const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( - FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0))); - const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( - FixedPoint0, 1057819769, - 2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0))); - const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( - FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0))); - - const FixedPointAccum shifted_quarter = - gemmlowp::Rescale(one_quarter); - - // Reinterpret the input value as Q0.31, because we will figure out the - // required shift "ourselves" instead of using, say, Rescale. - FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw()); - // z_a_pow_2 = input_integer_bits - z_a_headroom; - int z_a_headroom_plus_1 = CountLeadingZeros(static_cast(z_a.raw())); - FixedPoint0 r_a_tmp = - SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1)); - const int32_t r_a_raw = - SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1); - // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25); - // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25, - // InputIntegerBits - z_b_headroom - 0.25); - const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp( - FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam( - static_cast(InputIntegerBits - z_a_headroom_plus_1), - 31 - kAccumIntegerBits)), - shifted_quarter); - - // z_b is treated like z_a, but premultiplying by sqrt(0.5). - FixedPoint0 z_b = z_a * sqrt_half; - int z_b_headroom = CountLeadingZeros(static_cast(z_b.raw())) - 1; - const int32_t r_b_raw = - SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom); - const FixedPointAccum z_b_pow_2_adj = SaturatingSub( - FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam( - static_cast(InputIntegerBits - z_b_headroom), - 31 - kAccumIntegerBits)), - shifted_quarter); - - const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw)); - const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw( - std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw())); - - const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half); - FixedPoint0 q = r - sqrt_sqrt_half; - q = q + q; - - const FixedPoint0 common_sq = q * q; - const FixedPoint0 num = q * r + q * common_sq * alpha_n; - const FixedPoint0 denom_minus_one_0 = - p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q; - const FixedPoint0 recip_denom = - one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0); - - const FixedPointAccum num_scaled = gemmlowp::Rescale(num); - return gemmlowp::Rescale(z_pow_2_adj * log_2 + - num_scaled * recip_denom); -} - -template -inline gemmlowp::FixedPoint -log_x_for_x_greater_than_or_equal_to_1( - gemmlowp::FixedPoint input_val) { - static_assert( - OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits), - "Output integer bits must be sufficient to accommodate logs of inputs."); - return log_x_for_x_greater_than_or_equal_to_1_impl( - input_val); -} - -inline int32_t GetReciprocal(int32_t x, int x_integer_digits, - int* num_bits_over_unit) { - int headroom_plus_one = CountLeadingZeros(static_cast(x)); - // This is the number of bits to the left of the binary point above 1.0. - // Consider x=1.25. In that case shifted_scale=0.8 and - // no later adjustment will be needed. - *num_bits_over_unit = x_integer_digits - headroom_plus_one; - const int32_t shifted_sum_minus_one = - static_cast((static_cast(x) << headroom_plus_one) - - (static_cast(1) << 31)); - - gemmlowp::FixedPoint shifted_scale = - gemmlowp::one_over_one_plus_x_for_x_in_0_1( - gemmlowp::FixedPoint::FromRaw(shifted_sum_minus_one)); - return shifted_scale.raw(); -} - -inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift, - int32_t* output_inv_sqrt, - int* output_shift) { - TFLITE_DCHECK_GE(input, 0); - if (input <= 1) { - // Handle the input value 1 separately to avoid overflow in that case - // in the general computation below (b/143972021). Also handle 0 as if it - // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid - // but rare/unrealistic input value. We can expect both to occur in some - // incompletely trained models, but probably not in fully trained models. - *output_inv_sqrt = std::numeric_limits::max(); - *output_shift = 0; - return; - } - TFLITE_DCHECK_GT(input, 1); - *output_shift = 11; - while (input >= (1 << 29)) { - input /= 4; - ++*output_shift; - } - const unsigned max_left_shift_bits = - CountLeadingZeros(static_cast(input)) - 1; - const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2; - const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1; - *output_shift -= left_shift_bit_pairs; - input <<= 2 * left_shift_bit_pairs; - TFLITE_DCHECK_GE(input, (1 << 27)); - TFLITE_DCHECK_LT(input, (1 << 29)); - using gemmlowp::FixedPoint; - using gemmlowp::Rescale; - using gemmlowp::SaturatingRoundingMultiplyByPOT; - // Using 3 integer bits gives us enough room for the internal arithmetic in - // this Newton-Raphson iteration. - using F3 = FixedPoint; - using F0 = FixedPoint; - const F3 fixedpoint_input = F3::FromRaw(input >> 1); - const F3 fixedpoint_half_input = - SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input); - const F3 fixedpoint_half_three = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5); - // Newton-Raphson iteration - // Naive unoptimized starting guess: x = 1 - F3 x = F3::One(); - // Naive unoptimized number of iterations: 5 - for (int i = 0; i < 5; i++) { - const F3 x3 = Rescale<3>(x * x * x); - x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3); - } - const F0 fixedpoint_half_sqrt_2 = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.); - x = x * fixedpoint_half_sqrt_2; - *output_inv_sqrt = x.raw(); - if (*output_shift < 0) { - *output_inv_sqrt <<= -*output_shift; - *output_shift = 0; - } - // Convert right shift (right is positive) to left shift. - *output_shift *= reverse_shift; -} - -// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING -// BROADCASTING. -// -// NdArrayDesc describes the shape and memory layout of an N-dimensional -// rectangular array of numbers. -// -// NdArrayDesc is basically identical to Dims defined in types.h. -// However, as Dims is to be deprecated, this class exists as an adaptor -// to enable simple unoptimized implementations of element-wise broadcasting -// operations. -template -struct NdArrayDesc { - // The "extent" of each dimension. Indices along dimension d must be in the - // half-open interval [0, extents[d]). - int extents[N]; - - // The number of *elements* (not bytes) between consecutive indices of each - // dimension. - int strides[N]; -}; - -// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING -// BROADCASTING. -// -// Same as Offset(), except takes as NdArrayDesc instead of Dims. -inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2, - int i3) { - TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]); - TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]); - TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]); - TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]); - return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + - i3 * desc.strides[3]; -} - -inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) { - return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] + - indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + - indexes[4] * desc.strides[4]; -} - -inline int SubscriptToIndex(const NdArrayDesc<8>& desc, int indexes[8]) { - return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] + - indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + - indexes[4] * desc.strides[4] + indexes[5] * desc.strides[5] + - indexes[6] * desc.strides[6] + indexes[7] * desc.strides[7]; -} - -// Given the dimensions of the operands for an element-wise binary broadcast, -// adjusts them so that they can be directly iterated over with simple loops. -// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and -// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr. -// -// This function assumes that the two input shapes are compatible up to -// broadcasting and the shorter one has already been prepended with 1s to be the -// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64), -// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that -// Dims refer to shapes in reverse order. In this case, input0_dims will be -// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1). -// -// When two shapes are compatible up to broadcasting, for each dimension d, -// the input extents are either equal, or one of them is 1. -// -// This function performs the following for each dimension d: -// - If the extents are equal, then do nothing since the loop that walks over -// both of the input arrays is correct. -// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1 -// and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows -// array0 to be referenced *at any index* in dimension d and still access the -// same slice. -template -inline void NdArrayDescsForElementwiseBroadcast(const Dims& input0_dims, - const Dims& input1_dims, - NdArrayDesc* desc0_out, - NdArrayDesc* desc1_out) { - TFLITE_DCHECK(desc0_out != nullptr); - TFLITE_DCHECK(desc1_out != nullptr); - - // Copy dims to desc. - for (int i = 0; i < N; ++i) { - desc0_out->extents[i] = input0_dims.sizes[i]; - desc0_out->strides[i] = input0_dims.strides[i]; - desc1_out->extents[i] = input1_dims.sizes[i]; - desc1_out->strides[i] = input1_dims.strides[i]; - } - - // Walk over each dimension. If the extents are equal do nothing. - // Otherwise, set the desc with extent 1 to have extent equal to the other and - // stride 0. - for (int i = 0; i < N; ++i) { - const int extent0 = ArraySize(input0_dims, i); - const int extent1 = ArraySize(input1_dims, i); - if (extent0 != extent1) { - if (extent0 == 1) { - desc0_out->strides[i] = 0; - desc0_out->extents[i] = extent1; - } else { - TFLITE_DCHECK_EQ(extent1, 1); - desc1_out->strides[i] = 0; - desc1_out->extents[i] = extent0; - } - } - } -} - -// Copies dims to desc, calculating strides. -template -inline void CopyDimsToDesc(const RuntimeShape& input_shape, - NdArrayDesc* desc_out) { - int desc_stride = 1; - for (int i = N - 1; i >= 0; --i) { - desc_out->extents[i] = input_shape.Dims(i); - desc_out->strides[i] = desc_stride; - desc_stride *= input_shape.Dims(i); - } -} - -template -inline void NdArrayDescsForElementwiseBroadcast( - const RuntimeShape& input0_shape, const RuntimeShape& input1_shape, - NdArrayDesc* desc0_out, NdArrayDesc* desc1_out) { - TFLITE_DCHECK(desc0_out != nullptr); - TFLITE_DCHECK(desc1_out != nullptr); - - auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape); - auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape); - - // Copy dims to desc, calculating strides. - CopyDimsToDesc(extended_input0_shape, desc0_out); - CopyDimsToDesc(extended_input1_shape, desc1_out); - - // Walk over each dimension. If the extents are equal do nothing. - // Otherwise, set the desc with extent 1 to have extent equal to the other and - // stride 0. - for (int i = 0; i < N; ++i) { - const int extent0 = extended_input0_shape.Dims(i); - const int extent1 = extended_input1_shape.Dims(i); - if (extent0 != extent1) { - if (extent0 == 1) { - desc0_out->strides[i] = 0; - desc0_out->extents[i] = extent1; - } else { - TFLITE_DCHECK_EQ(extent1, 1); - desc1_out->strides[i] = 0; - desc1_out->extents[i] = extent0; - } - } - } -} - -template -inline void NdArrayDescsForElementwiseBroadcast( - const RuntimeShape& input0_shape, const RuntimeShape& input1_shape, - const RuntimeShape& input2_shape, NdArrayDesc* desc0_out, - NdArrayDesc* desc1_out, NdArrayDesc* desc2_out) { - TFLITE_DCHECK(desc0_out != nullptr); - TFLITE_DCHECK(desc1_out != nullptr); - TFLITE_DCHECK(desc2_out != nullptr); - - auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape); - auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape); - auto extended_input2_shape = RuntimeShape::ExtendedShape(N, input2_shape); - - // Copy dims to desc, calculating strides. - CopyDimsToDesc(extended_input0_shape, desc0_out); - CopyDimsToDesc(extended_input1_shape, desc1_out); - CopyDimsToDesc(extended_input2_shape, desc2_out); - - // Walk over each dimension. If the extents are equal do nothing. - // Otherwise, set the desc with extent 1 to have extent equal to the other and - // stride 0. - for (int i = 0; i < N; ++i) { - const int extent0 = extended_input0_shape.Dims(i); - const int extent1 = extended_input1_shape.Dims(i); - const int extent2 = extended_input2_shape.Dims(i); - - int extent = extent0; - if (extent1 != 1) extent = extent1; - if (extent2 != 1) extent = extent2; - - TFLITE_DCHECK(extent0 == 1 || extent0 == extent); - TFLITE_DCHECK(extent1 == 1 || extent1 == extent); - TFLITE_DCHECK(extent2 == 1 || extent2 == extent); - - if (!(extent0 == extent1 && extent1 == extent2)) { - if (extent0 == 1) { - desc0_out->strides[i] = 0; - desc0_out->extents[i] = extent; - } - if (extent1 == 1) { - desc1_out->strides[i] = 0; - desc1_out->extents[i] = extent; - } - if (extent2 == 1) { - desc2_out->strides[i] = 0; - desc2_out->extents[i] = extent; - } - } - } -} - -// Detailed implementation of NDOpsHelper, the indexes must be a zero array. -// This implementation is equivalent to N nested loops. Ex, if N=4, it can be -// re-writen as: -// for (int b = 0; b < output.extents[0]; ++b) { -// for (int y = 0; y < output.extents[1]; ++y) { -// for (int x = 0; x < output.extents[2]; ++x) { -// for (int c = 0; c < output.extents[3]; ++c) { -// calc({b,y,x,c}); -// } -// } -// } -// } -template -typename std::enable_if::type NDOpsHelperImpl( - const NdArrayDesc& output, const Calc& calc, int indexes[N]) { - for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) { - NDOpsHelperImpl(output, calc, indexes); - } -} - -template -typename std::enable_if::type NDOpsHelperImpl( - const NdArrayDesc& output, const Calc& calc, int indexes[N]) { - for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) { - calc(indexes); - } -} - -// Execute the calc function in the innermost iteration based on the shape of -// the output. The calc function should take a single argument of type int[N]. -template -inline void NDOpsHelper(const NdArrayDesc& output, const Calc& calc) { - int indexes[N] = {0}; - NDOpsHelperImpl(output, calc, indexes); -} -// Copied from gemmlowp::RoundDown when we dropped direct dependency on -// gemmlowp. -// -// Returns the runtime argument rounded down to the nearest multiple of -// the fixed Modulus. -template -Integer RoundDown(Integer i) { - return i - (i % Modulus); -} - -// Copied from gemmlowp::RoundUp when we dropped direct dependency on -// gemmlowp. -// -// Returns the runtime argument rounded up to the nearest multiple of -// the fixed Modulus. -template -Integer RoundUp(Integer i) { - return RoundDown(i + Modulus - 1); -} - -// Copied from gemmlowp::CeilQuotient when we dropped direct dependency on -// gemmlowp. -// -// Returns the quotient a / b rounded up ('ceil') to the nearest integer. -template -Integer CeilQuotient(Integer a, Integer b) { - return (a + b - 1) / b; -} - -// This function is a copy of gemmlowp::HowManyThreads, copied when we dropped -// the direct dependency of internal/optimized/ on gemmlowp. -// -// It computes a reasonable number of threads to use for a GEMM of shape -// (rows, cols, depth). -// -// TODO(b/131910176): get rid of this function by switching each call site -// to its own more sensible logic for its own workload. -template -inline int LegacyHowManyThreads(int max_num_threads, int rows, int cols, - int depth) { - // Early-exit in the default case where multi-threading is disabled. - if (max_num_threads == 1) { - return 1; - } - - // Ensure that each thread has KernelRows rows to process, if at all possible. - int thread_count = std::min(max_num_threads, rows / KernelRows); - - // Limit the number of threads according to the overall size of the problem. - if (thread_count > 1) { - // Empirically determined value. - static constexpr std::uint64_t min_cubic_size_per_thread = 64 * 1024; - - // We can only multiply two out of three sizes without risking overflow - const std::uint64_t cubic_size = - std::uint64_t(rows) * std::uint64_t(cols) * std::uint64_t(depth); - - thread_count = std::min( - thread_count, static_cast(cubic_size / min_cubic_size_per_thread)); - } - - if (thread_count < 1) { - thread_count = 1; - } - - assert(thread_count > 0 && thread_count <= max_num_threads); - return thread_count; -} - -template -void optimized_ops_preload_l1_stream(const T* ptr) { -#ifdef __GNUC__ - // builtin offered by GCC-compatible compilers including clang - __builtin_prefetch(ptr, /* 0 means read */ 0, /* 0 means no locality */ 0); -#else - (void)ptr; -#endif -} - -template -void optimized_ops_preload_l1_keep(const T* ptr) { -#ifdef __GNUC__ - // builtin offered by GCC-compatible compilers including clang - __builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3); -#else - (void)ptr; -#endif -} - -template -void optimized_ops_prefetch_write_l1_keep(const T* ptr) { -#ifdef __GNUC__ - // builtin offered by GCC-compatible compilers including clang - __builtin_prefetch(ptr, /* 1 means write */ 1, /* 3 means high locality */ 3); -#else - (void)ptr; -#endif -} - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/compatibility.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/compatibility.h deleted file mode 100644 index 7ba66ed8..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/compatibility.h +++ /dev/null @@ -1,122 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_ - -#include - -#include "tensorflow/lite/kernels/op_macros.h" - -#ifndef TFLITE_DCHECK -#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE -#endif - -#ifndef TFLITE_DCHECK_EQ -#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE -#endif - -#ifndef TFLITE_DCHECK_NE -#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE -#endif - -#ifndef TFLITE_DCHECK_GE -#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE -#endif - -#ifndef TFLITE_DCHECK_GT -#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE -#endif - -#ifndef TFLITE_DCHECK_LE -#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE -#endif - -#ifndef TFLITE_DCHECK_LT -#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE -#endif - -// TODO(ahentz): Clean up: We should stick to the DCHECK versions. -#ifndef TFLITE_CHECK -#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT -#endif - -#ifndef TFLITE_CHECK_EQ -#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT -#endif - -#ifndef TFLITE_CHECK_NE -#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT -#endif - -#ifndef TFLITE_CHECK_GE -#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT -#endif - -#ifndef TFLITE_CHECK_GT -#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT -#endif - -#ifndef TFLITE_CHECK_LE -#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT -#endif - -#ifndef TFLITE_CHECK_LT -#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT -#endif - -#ifndef TF_LITE_STATIC_MEMORY -// TODO(b/162019032): Consider removing these type-aliases. -using int8 = std::int8_t; -using uint8 = std::uint8_t; -using int16 = std::int16_t; -using uint16 = std::uint16_t; -using int32 = std::int32_t; -using uint32 = std::uint32_t; -#endif // !defined(TF_LITE_STATIC_MEMORY) - -// Allow for cross-compiler usage of function signatures - currently used for -// specifying named RUY profiler regions in templated methods. -#if defined(_MSC_VER) -#define TFLITE_PRETTY_FUNCTION __FUNCSIG__ -#elif defined(__GNUC__) -#define TFLITE_PRETTY_FUNCTION __PRETTY_FUNCTION__ -#else -#define TFLITE_PRETTY_FUNCTION __func__ -#endif - -// TFLITE_DEPRECATED() -// -// Duplicated from absl/base/macros.h to avoid pulling in that library. -// Marks a deprecated class, struct, enum, function, method and variable -// declarations. The macro argument is used as a custom diagnostic message (e.g. -// suggestion of a better alternative). -// -// Example: -// -// class TFLITE_DEPRECATED("Use Bar instead") Foo {...}; -// TFLITE_DEPRECATED("Use Baz instead") void Bar() {...} -// -// Every usage of a deprecated entity will trigger a warning when compiled with -// clang's `-Wdeprecated-declarations` option. This option is turned off by -// default, but the warnings will be reported by clang-tidy. -#if defined(__clang__) && __cplusplus >= 201103L -#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message))) -#endif - -#ifndef TFLITE_DEPRECATED -#define TFLITE_DEPRECATED(message) -#endif - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/cppmath.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/cppmath.h deleted file mode 100644 index c97cc31d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/cppmath.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_ - -#include - -namespace tflite { - -#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \ - (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__) -#define TF_LITE_GLOBAL_STD_PREFIX -#else -#define TF_LITE_GLOBAL_STD_PREFIX std -#endif - -#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \ - template \ - inline T tf_name(const T x) { \ - return TF_LITE_GLOBAL_STD_PREFIX::std_name(x); \ - } - -DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round); -DECLARE_STD_GLOBAL_SWITCH1(TfLiteExpm1, expm1); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/max.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/max.h deleted file mode 100644 index c1810027..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/max.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_ - -#include - -namespace tflite { - -#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__) -inline float TfLiteMax(const float& x, const float& y) { - return std::max(x, y); -} -#else -template -inline T TfLiteMax(const T& x, const T& y) { - return std::fmax(x, y); -} -#endif - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/min.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/min.h deleted file mode 100644 index 62035dcc..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/min.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_ - -#include - -namespace tflite { - -#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__) -inline float TfLiteMin(const float& x, const float& y) { - return std::min(x, y); -} -#else -template -inline T TfLiteMin(const T& x, const T& y) { - return std::fmin(x, y); -} -#endif - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/optimized/neon_check.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/optimized/neon_check.h deleted file mode 100644 index 7df1129d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/optimized/neon_check.h +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_ - -// TFLM does not need to utilize any Neon optimizations. - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor.h deleted file mode 100644 index 4d71c967..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor.h +++ /dev/null @@ -1,122 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_ - -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -inline RuntimeShape GetTensorShape(std::vector data) { - return RuntimeShape(data.size(), data.data()); -} - -// A list of tensors in a format that can be used by kernels like split and -// concatenation. -template -class VectorOfTensors { - public: - // Build with the tensors in 'tensor_list'. - VectorOfTensors(const TfLiteContext& context, - const TfLiteIntArray& tensor_list) { - int num_tensors = tensor_list.size; - - all_data_.reserve(num_tensors); - all_shape_.reserve(num_tensors); - all_shape_ptr_.reserve(num_tensors); - - for (int i = 0; i < num_tensors; ++i) { - TfLiteTensor* t = &context.tensors[tensor_list.data[i]]; - all_data_.push_back(GetTensorData(t)); - all_shape_.push_back(GetTensorShape(t)); - } - - // Taking the pointer from inside a std::vector is only OK if the vector is - // never modified, so we populate all_shape in the previous loop and then we - // are free to grab iterators here. - for (int i = 0; i < num_tensors; ++i) { - all_shape_ptr_.push_back(&all_shape_[i]); - } - } - // Return a pointer to the data pointers of all tensors in the list. For - // example: - // float* const* f = v.data(); - // f[0][1] is the second element of the first tensor. - T* const* data() const { return all_data_.data(); } - - // Return a pointer the shape pointers of all tensors in the list. For - // example: - // const RuntimeShape* const* d = v.dims(); - // dims[1] are the dimensions of the second tensor in the list. - const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); } - - private: - std::vector all_data_; - std::vector all_shape_; - std::vector all_shape_ptr_; -}; - -// A list of quantized tensors in a format that can be used by kernels like -// split and concatenation. -class VectorOfQuantizedTensors : public VectorOfTensors { - public: - // Build with the tensors in 'tensor_list'. - VectorOfQuantizedTensors(const TfLiteContext& context, - const TfLiteIntArray& tensor_list) - : VectorOfTensors(context, tensor_list) { - for (int i = 0; i < tensor_list.size; ++i) { - TfLiteTensor* t = &context.tensors[tensor_list.data[i]]; - zero_point_.push_back(t->params.zero_point); - scale_.push_back(t->params.scale); - } - } - - const float* scale() const { return scale_.data(); } - const int32_t* zero_point() const { return zero_point_.data(); } - - private: - std::vector zero_point_; - std::vector scale_; -}; - -// Writes randomly accessed values from `input` sequentially into `output`. -template -class SequentialTensorWriter { - public: - SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output) { - input_data_ = GetTensorData(input); - output_ptr_ = GetTensorData(output); - } - SequentialTensorWriter(const T* input_data, T* output_data) - : input_data_(input_data), output_ptr_(output_data) {} - - void Write(int position) { *output_ptr_++ = input_data_[position]; } - void WriteN(int position, int len) { - memcpy(output_ptr_, &input_data_[position], sizeof(T) * len); - output_ptr_ += len; - } - - private: - const T* input_data_; - T* output_ptr_; -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor_utils.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor_utils.h deleted file mode 100644 index 122a0dc2..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/portable_tensor_utils.h +++ /dev/null @@ -1,484 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_ - -#include -#include -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" - -#if defined(_MSC_VER) -#define __restrict__ __restrict -#endif - -namespace tflite { - -namespace tensor_utils { - -// Multiplies a matrix with a scalar and reduce the result on each row to a -// scalar. -// Parameters: -// - matrix: matrix of size n_row * n_col -// - scalar: the scalar that is multiplied to each element in the matrix -// - n_row: the row count of the matrix -// - n_col: the column count of the matrix -// - output: the 32bit output -// Note: We do not need saturation because the int8 * int8 is safe from overflow -// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero -// initial output value is not exceptionally large. -void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar, - int32_t n_row, int32_t n_col, - int32_t* output); - -// Add another vector for each batch in the batch vector. -template -void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch, - T* batch_vector) { - for (int b = 0; b < n_batch; b++) { - for (int i = 0; i < v_size; ++i) { - batch_vector[i] += vector[i]; - } - batch_vector += v_size; - } -} - -// Cwise product of two vectors. -template -inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2, - int v_size, T* result) { - for (int v = 0; v < v_size; v++) { - *result++ = *vector1++ * *vector2++; - } -} - -// Cwise product of a vector and a batch-vector. -template -inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size, - const T* batch_vector, int n_batch, - T* result) { - for (int b = 0; b < n_batch; b++) { - VectorVectorCwiseProduct(vector, batch_vector, v_size, result); - // Update the pointers. - result += v_size; - batch_vector += v_size; - } -} - -// Cwise product and accumulate of two vectors. Since it's a MAC operation, the -// assumption here is that result array is initialized to valid values. -template -inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1, - const T* __restrict__ vector2, - int v_size, - T* __restrict__ result) { - for (int v = 0; v < v_size; v++) { - *result++ += *vector1++ * *vector2++; - } -} - -// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC -// operation, the assumption here is that result array is initialized to valid -// values. -template -inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size, - const T* batch_vector, - int n_batch, T* result) { - for (int b = 0; b < n_batch; b++) { - VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result); - // Update the pointers. - result += v_size; - batch_vector += v_size; - } -} - -// Batch vector initialization with another vector. -template -void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch, - T* batch_vector) { - for (int b = 0; b < n_batch; b++) { - std::copy_n(vector, v_size, batch_vector + b * v_size); - } -} - -// Checks if all entries of vector are zero for float. -bool IsZeroVector(const float* vector, int v_size); - -// Checks if all entries of vector are zero for int8. -bool IsZeroVector(const int8_t* vector, int v_size); - -// Quantizes a buffer of floating point values using a symmetric quantization -// (i.e. linear quantization without an offset) to 8-bit signed integers. -// It also outputs the range (min, max) of the floating point buffer, and the -// scaling factor used to quantize the values. -void SymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float* min_value, - float* max_value, float* scaling_factor); - -// Quantizes a buffer of floating point values using a symmetric quantization -// (i.e. linear quantization without an offset) to 8-bit signed integers. -// It uses the range (min, max) provided to the function to calculate the -// appropriate scaling factor to quantize the values. -void SymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float min_value, - float max_value, float* scaling_factor); - -void AsymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float* scaling_factor, - int32_t* offset); - -// Helper function to quantize floats. -// float_data_ptr input float vectors -// n_batch number of input vectors -// n_data size of a single input vector -// quantized_data_ptr (out) vector with quantized data -// scaling_factors (out) scaling factors (one per vector) -// zero_points (out) zero points (one per vector) -// do_asymmetric controls if the quantization should be asymmetric. -inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch, - int n_data, int8_t* quantized_data_ptr, - float* scaling_factors, int32_t* zero_points, - bool do_asymmetric) { - for (int b = 0; b < n_batch; ++b) { - const int offset = b * n_data; - if (do_asymmetric) { - tensor_utils::AsymmetricQuantizeFloats( - float_data_ptr + offset, n_data, quantized_data_ptr + offset, - &scaling_factors[b], &zero_points[b]); - } else { - float unused_min, unused_max; - tensor_utils::SymmetricQuantizeFloats( - float_data_ptr + offset, n_data, quantized_data_ptr + offset, - &unused_min, &unused_max, &scaling_factors[b]); - } - } -} - -// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch -// dimension composed by input vectors independent from each other). The result -// of the multiplication is accumulated to the passed result buffer. -// More specifically, for a matrix M of shape [n, i] and a batched-vector -// of shape [i, batch] it will first compute the product of shape [n, batch]. -// This product will be accumulated to the result buffer. -void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, - int m_cols, const float* vector, - int n_batch, float* result); - -// Same as the function above, but the matrix is a sparse tensor with block -// pattern 1x4. -// This function assumes that m_cols is a multiple of the block size (4 in this -// case) so that there's no incomplete block. -void SparseMatrixBatchVectorMultiplyAccumulate1x4( - const float* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const float* __restrict__ vector, int n_batch, float* __restrict__ result); - -// Same as the function above, but the matrix is stored in block compressed -// sparse row format with block pattern 1x16 which consists of two arrays: -// 1. A matrix array stores non-zero blocks of the matrix in row major. -// 2. A ledger array stores nrows groups, one group per row. Each group starts -// with an integer representing the number of non-zero blocks for the -// corresponding row and follows with column indexes of the first element -// of each non-zero block. -// This function assumes that -// 1. m_cols is a multiple of 16 so that all blocks are full blocks. -// 2. m_cols < 254 * 16 so that block index can be represented by uint8. -void SparseMatrixBatchVectorMultiplyAccumulate( - const float* __restrict__ matrix, const uint8_t* __restrict__ ledger, - int m_rows, int m_cols, const float* __restrict__ vector, int n_batch, - float* __restrict__ result); - -// Same as the function above, but for values quantized using symmetric -// quantization (e.g. by calling SymmetricQuantizeFloats). -// The passed scaling factors is a buffer of the quantization scaling factors -// that will be used to dequentize the products into the final result buffer. -// These scaling factors are the multiplication of the matrix scaling factor -// by the vector's scaling factor, one per batch (i.e. this allows quantizing -// each batch in the batch-vector matrix independently). -void MatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, - const float* __restrict__ scaling_factors, int n_batch, - float* __restrict__ result); - -// Same as the function above except that vector values -// are quantized with asymmetric quantization per-batch and the matrix -// is quantized per row. -void MatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, - const float* __restrict__ scaling_factors, int n_batch, - float* __restrict__ result, const float* __restrict__ per_channel_scale, - const int32_t* __restrict__ input_offset); - -// Same as the function above, but the matrix is a sparse tensor with block -// pattern 1x16. -// This function assumes that m_cols is a multiple of the block size (16 in this -// case) so that there's no incomplete block. Also, it assumes all offsets of -// input, output and filter are zero. -void SparseMatrixBatchVectorMultiplyAccumulate1x16( - const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector, - int n_batch, const int32_t input_offset, const int32_t output_multiplier, - const int32_t output_shift, const int32_t output_offset, - const int32_t output_activation_min, const int32_t output_activation_max, - int8_t* __restrict__ result); - -// Same as the function above, but the matrix is stored in block compressed -// sparse row format with block pattern 1x16 which consists of two arrays: -// 1. A matrix array stores non-zero blocks of the matrix in row major. -// 2. A ledger array stores nrows groups, one group per row. Each group starts -// with an integer representing the number of non-zero blocks for the -// corresponding row followed by column index of the first element of -// each non-zero block. -// This function assumes that -// 1. m_cols is a multiple of 16 so that all blocks are full blocks. -// 2. m_cols < 254 * 16 so that block index can be represented by uint8. -void SparseMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const uint8_t* __restrict__ ledger, - const int m_rows, const int m_cols, const int8_t* __restrict__ vectors, - const float* __restrict__ scaling_factors, int n_batch, - float* __restrict__ result); - -// Same as the above 8, 8, 8 integer matmul except for the presence of zero -// point and non-accumulative. -// TODO(b/148688698): remove this function by folding zero point calculation in -// prepare() function. -void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint, - const int8_t* input_to_gate_weights, - int32_t input_to_gate_effective_scale_a, - int32_t input_to_gate_effective_scale_b, - int32_t n_batch, int32_t n_input, int32_t n_cell, - int8_t* gate_output, int8_t gate_output_zp); - -// Same as above but has 16 bit and 8 bit input and 8 bit output. -// Used in projection when hidden is 16bit. -void MatrixBatchVectorMultiply(const int16_t* hidden, - const int8_t* hidden_to_output_weights, - int32_t proj_effective_scale_a, - int32_t proj_effective_scale_b, - const int32_t* gate_bias, int32_t n_batch, - int32_t n_hidden, int32_t n_output, - int32_t output_zp, int8_t* proj_output); - -// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized -// vector. -// Parameters: -// - input: batch vector of size n_batch * n_input; 16 bit. -// - layer_norm_weights: the quantized layer normalization weights. -// - bias: the bias for the layer normalization. -// - layer_norm_scale_a: multiplier for scale factor. -// - layer_norm_scale_b: shift for scale factor. -// - variance_limit: the guard to make sure the inverse does not overflow. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - output: the 16 bit output -void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights, - const int32_t* bias, int32_t layer_norm_scale_a, - int32_t layer_norm_scale_b, int32_t variance_limit, - int n_batch, int n_input, int16_t* output); - -// Same as above but the internal calculation is done in float. -void ApplyLayerNormFloat(const int16_t* input, - const int16_t* layer_norm_weights, - int32_t layer_norm_scale_a, int32_t layer_norm_scale_b, - const int32_t* bias, int n_batch, int n_input, - int16_t* output); - -// Apply Sigmoid to a quantized vector. -// Parameters: -// - input: batch vector of size n_batch * n_input; 16 bit. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - output: the 16 bit output -// The input is in Q3.12 format and the output is in Q0.15 format. -void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input, - int16_t* output); - -// Same as above but the internal calcualtion is float. -void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input, - int16_t* output); - -// Apply Tanh to a quantized vector. -// Parameters: -// - integer_bits: the integer bits of the input. -// Currently supports 0, 1, 2, 3, 4, 5, 6. -// - input: batch vector of size n_batch * n_input; 16 bit. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - output: the 16 bit output -// The input is in Qm.15-m format and the output is in Q0.15 format. -void ApplyTanh(int32_t intger_bits, const int16_t* input, int32_t n_batch, - int32_t n_input, int16_t* output); - -// Apply Tanh to a quantized vector. Tbe internal calculation is in float. -// - Input has 2^(integer_bits) as scale. -// - Output has Q0.15 as scale. -void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input, - int32_t integer_bits, int16_t* output); - -// Element-wise multiplication of two quantized vectors. -// Parameters: -// - input_1: batch vector of size n_batch * n_input; 16 bit. -// - input_2: batch vector of size n_batch * n_input; 16 bit. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - shift: the shift needed to produce the output. -// - output: the 16 bit output of size n_batch * n_input. -// Output does not need to be initialized. -void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch, - int n_input, int shift, int16_t* output); - -// Element-wise multiplication of two quantized vectors. -// Parameters: -// - input_1: batch vector of size n_batch * n_input; 16 bit. -// - input_2: batch vector of size n_batch * n_input; 16 bit. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - shift: the shift needed to produce the output. -// - output: the 8 bit output of size n_batch * n_input. -// Output does not need to be initialized. -void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch, - int n_input, int shift, int8_t* output); - -// Element-wise multiplication of two quantized vectors with rescaling. -// Parameters: -// - input_1: batch vector of size n_batch * n_input; 16 bit. -// - input_2: batch vector of size n_batch * n_input; 16 bit. -// - multiplier: the multiplier part of scale. -// - shift: the shift part of scale. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - output: the 8 bit output of size n_batch * n_input. -// - output_zp: the zero point of output. -// Output does not need to be initialized. -// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m * -// 2^(s - 31). -void CwiseMul(const int16_t* input_1, const int16_t* input_2, - int32_t multiplier, int32_t shift, int32_t n_batch, - int32_t n_input, int32_t output_zp, int8_t* output); - -// Element-wise saturating addition of two quantized vectors without rescaling. -// Parameters: -// - input_1: batch vector of size n_batch * n_input; 16 bit. -// - input_2: batch vector of size n_batch * n_input; 16 bit. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - output: the 8 bit output of size n_batch * n_input. -// Output does not need to be initialized. -void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, - int n_input, int16_t* output); - -// Element-wise in-place clipping of a vector. Overloaded for float, int16_t, -// int8_t. Parameters: -// - vector: vector of size v_size. -// - v_size: the size of the vector. -// - clipping_value: the value used for clipping. -void CwiseClipping(float* vector, const int v_size, const float clipping_value); -void CwiseClipping(int16_t* vector, const int v_size, - const int16_t clipping_value); -void CwiseClipping(int8_t* vector, const int v_size, - const int8_t clipping_value); - -// Dot product of two vectors. -float VectorVectorDotProduct(const float* vector1, const float* vector2, - int v_size); - -// Dot product of two batch vectors of size n_batch * v_size: -// vector1 = [x_1_1, x_1_2, ..., x_1_vsize, -// x_2_1, x_2_2, ..., x_2_vsize, -// ... -// x_nbatch_1,..., x_nbatch_vsize] -// vector2 = [y_1_1, y_1_2, ..., y_1_vsize, -// y_2_1, y_2_2, ..., y_2_vsize, -// ... -// y_nbatch_1,..., y_nbatch_vsize] -// Then result will be a vector of n_batch size starting from 'result': -// [x_1_1 * y_1_1 + x_1_2 * y_1_2 + ... + x_1_vsize * y_1_vsize, -// x_2_1 * y_2_1 + x_2_2 * y_2_2 + ... + x_2_vsize * y_2_vsize, -// ... -// x_nbatch_1 * y_nbatch_1 + ... + x_nbatch_vsize * y_nbatch_vsize] -template -inline void BatchVectorBatchVectorDotProduct(const T* vector1, const T* vector2, - int v_size, int n_batch, - T* result) { - for (int b = 0; b < n_batch; b++) { - result[b] = VectorVectorDotProduct(vector1, vector2, v_size); - vector1 += v_size; - vector2 += v_size; - } -} - -// Same as above but input is 16bit and output is 32bit. -void BatchVectorBatchVectorDotProduct(const int16_t* vector1, - const int16_t* vector2, int v_size, - int n_batch, int32_t* result); - -// Same as above, but inputs are 16bit integer and output is 16bit integer. -void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size, - const int16_t* batch_vector, - int n_batch, int32_t multiplier, - int shift, int16_t* result); - -// Compute "1.0f - elements of vector" (used in CIFG). -void Sub1Vector(const float* vector, int v_size, float* result); - -// Compute "1.0f - elements of vector" (used in CIFG) for int16 input. -// "vector" has range [0, 32767] because it is the output of sigmoid function. -void Sub1Vector(const int16_t* vector, int v_size, int16_t* result); - -// Multiply all elements of vector with a scalar. -void VectorScalarMultiply(const int8_t* vector, int v_size, float scale, - float* result); - -// Reduce-sum on a float input vector: -// input_vector: float pointer to input vector. -// output_vector: float pointer to vector. -// output_size: output vector size. -// reduction_size: number of consecutive elements from input vector which are -// added to get one element of output. -void ReductionSumVector(const float* input_vector, float* output_vector, - int output_size, int reduction_size); - -// Same as above but input/output is 32 bit integer. -void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector, - int output_size, int reduction_size); - -// Same as above but input is 8 bit integer. -void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector, - int output_size, int reduction_size); - -// Layer norm for each batch. -void MeanStddevNormalization(const float* input_vector, float* output_vector, - int v_size, int n_batch); - -// Saturate Add with rescale on both inputs. -void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, - const int8_t* recurrent, int8_t recurrent_zp, - int32_t input_effective_scale_a, - int32_t input_effective_scale_b, - int32_t recurrent_effective_scale_a, - int32_t recurrent_effective_scale_b, int32_t n_batch, - int32_t n_cell, int16_t* output); - -} // namespace tensor_utils - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.cc b/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.cc deleted file mode 100644 index 62045d67..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.cc +++ /dev/null @@ -1,416 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/quantization_util.h" - -#include -#include -#include - -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" - -namespace tflite { - -namespace { -// These constants are used to manipulate the binary representation of doubles. -// Double-precision binary64 floating point format is: -// Bit | 63 | 62-52 | 51-0 | -// | Sign | Exponent | Fraction | -// To avoid 64-bit integers as much as possible, I break this into high and -// low 32-bit chunks. High is: -// Bit | 31 | 30-20 | 19-0 | -// | Sign | Exponent | High Fraction | -// Low is: -// Bit | 31-0 | -// | Low Fraction | -// We then access the components through logical bit-wise operations to -// extract the parts needed, with the positions and masks derived from the -// layout shown above. -constexpr uint64_t kSignMask = 0x8000000000000000LL; -constexpr uint64_t kExponentMask = 0x7ff0000000000000LL; -constexpr int32_t kExponentShift = 52; -constexpr int32_t kExponentBias = 1023; -constexpr uint32_t kExponentIsBadNum = 0x7ff; -constexpr uint64_t kFractionMask = 0x000fffffffc00000LL; -constexpr uint32_t kFractionShift = 22; -constexpr uint32_t kFractionRoundingMask = 0x003fffff; -constexpr uint32_t kFractionRoundingThreshold = 0x00200000; -} // namespace - -void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, - int* shift) { -#if TFLITE_SINGLE_ROUNDING - // Single-rounding MultiplyByQuantizedMultiplier only supports positive - // multipliers. - // TFLITE_DCHECK(double_multiplier >= 0); -#endif - if (double_multiplier == 0.) { - *quantized_multiplier = 0; - *shift = 0; - return; - } -#ifdef TFLITE_EMULATE_FLOAT - // If we're trying to avoid the use of floating-point instructions (for - // example on microcontrollers) then use an alternative implementation - // that only requires integer and bitwise operations. To enable this, you - // need to set the define during the build process for your platform. - int64_t q_fixed = IntegerFrExp(double_multiplier, shift); -#else // TFLITE_EMULATE_FLOAT - const double q = std::frexp(double_multiplier, shift); - auto q_fixed = static_cast(TfLiteRound(q * (1LL << 31))); -#endif // TFLITE_EMULATE_FLOAT - TFLITE_CHECK(q_fixed <= (1LL << 31)); - if (q_fixed == (1LL << 31)) { - q_fixed /= 2; - ++*shift; - } - TFLITE_CHECK_LE(q_fixed, std::numeric_limits::max()); - // A shift amount smaller than -31 would cause all bits to be shifted out - // and thus all results would be zero. We implement that instead with - // q_fixed==0, so as to avoid hitting issues with right-shift - // operations with shift amounts greater than 31. Note that this happens - // roughly when abs(double_multiplier) < 2^-31 and the present handling means - // that we're effectively flushing tiny double_multiplier's to zero. - // We could conceivably handle values in the range (roughly) [32, 63] - // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view - // the present handling is just doing 'flush denormals to zero'. We could - // reconsider and actually generate nonzero denormals if a need arises. - if (*shift < -31) { - *shift = 0; - q_fixed = 0; - } -#if TFLITE_SINGLE_ROUNDING - // Single-rounding MultiplyByQuantizedMultiplier doesn't support a shift > 30, - // saturate it. - if (*shift > 30) { - *shift = 30; - q_fixed = (1LL << 31) - 1; - } -#endif - *quantized_multiplier = static_cast(q_fixed); -} - -void QuantizeMultiplierGreaterThanOne(double double_multiplier, - int32_t* quantized_multiplier, - int* left_shift) { - TFLITE_CHECK_GT(double_multiplier, 1.); - QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift); - TFLITE_CHECK_GE(*left_shift, 0); -} - -void QuantizeMultiplierSmallerThanOneExp(double double_multiplier, - int32_t* quantized_multiplier, - int* left_shift) { - TFLITE_CHECK_LT(double_multiplier, 1.); - TFLITE_CHECK_GT(double_multiplier, 0.); - int shift; - QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift); - TFLITE_CHECK_LE(shift, 0); - *left_shift = shift; -} - -int64_t IntegerFrExp(double input, int* shift) { - // Make sure our assumptions about the double layout hold. - TFLITE_CHECK_EQ(8, sizeof(double)); - - // We want to access the bits of the input double value directly, which is - // tricky to do safely, so use a union to handle the casting. - union { - double double_value; - uint64_t double_as_uint; - } cast_union; - cast_union.double_value = input; - const uint64_t u = cast_union.double_as_uint; - - // If the bitfield is all zeros apart from the sign bit, this is a normalized - // zero value, so return standard values for this special case. - if ((u & ~kSignMask) == 0) { - *shift = 0; - return 0; - } - - // Deal with NaNs and Infs, which are always indicated with a fixed pattern in - // the exponent, and distinguished by whether the fractions are zero or - // non-zero. - const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift); - if (exponent_part == kExponentIsBadNum) { - *shift = std::numeric_limits::max(); - if (u & kFractionMask) { - // NaN, so just return zero (with the exponent set to INT_MAX). - return 0; - } else { - // Infinity, so return +/- INT_MAX. - if (u & kSignMask) { - return std::numeric_limits::min(); - } else { - return std::numeric_limits::max(); - } - } - } - - // The shift is fairly easy to extract from the high bits of the double value, - // just by masking it out and applying a bias. The std::frexp() implementation - // always returns values between 0.5 and 1.0 though, whereas the exponent - // assumes 1.0 to 2.0 is the standard range, so I add on one to match that - // interface. - *shift = (exponent_part - kExponentBias) + 1; - - // There's an implicit high bit in the double format definition, so make sure - // we include that at the top, and then reconstruct the rest of the fractional - // value from the remaining fragments. - int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift); - - // We're cutting off some bits at the bottom, so to exactly match the standard - // frexp implementation here we'll apply rounding by adding one to the least - // significant bit of the result if the discarded portion is over half of the - // maximum. - if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) { - fraction += 1; - } - // Negate the fraction if the sign bit was set. - if (u & kSignMask) { - fraction *= -1; - } - - return fraction; -} - -double DoubleFromFractionAndShift(int64_t fraction, int shift) { - union { - double double_value; - uint64_t double_as_uint; - } result; - - // Detect NaNs and infinities. - if (shift == std::numeric_limits::max()) { - if (fraction == 0) { - return std::numeric_limits::quiet_NaN(); - } else if (fraction > 0) { - return std::numeric_limits::infinity(); - } else { - return -std::numeric_limits::infinity(); - } - } - - // Return a normalized zero for a zero fraction. - if (fraction == 0) { - result.double_as_uint = 0; - return result.double_value; - } - - bool is_negative = (fraction < 0); - int64_t encoded_fraction = is_negative ? -fraction : fraction; - int64_t encoded_shift = (shift - 1); - while (encoded_fraction < 0x40000000) { - encoded_fraction *= 2; - encoded_shift -= 1; - } - while (encoded_fraction > 0x80000000) { - encoded_fraction /= 2; - encoded_shift += 1; - } - encoded_fraction -= 0x40000000; - if (encoded_shift < -1022) { - encoded_shift = -1023; - } else if (encoded_shift > 1022) { - encoded_shift = 1023; - } - encoded_shift += kExponentBias; - uint64_t encoded_sign = is_negative ? kSignMask : 0; - result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) | - (encoded_fraction << kFractionShift); - return result.double_value; -} - -double IntegerDoubleMultiply(double a, double b) { - int a_shift; - const int64_t a_fraction = IntegerFrExp(a, &a_shift); - int b_shift; - const int64_t b_fraction = IntegerFrExp(b, &b_shift); - // Detect NaNs and infinities. - if (a_shift == std::numeric_limits::max() || - (b_shift == std::numeric_limits::max())) { - return std::numeric_limits::quiet_NaN(); - } - const int result_shift = a_shift + b_shift + 1; - const int64_t result_fraction = (a_fraction * b_fraction) >> 32; - return DoubleFromFractionAndShift(result_fraction, result_shift); -} - -int IntegerDoubleCompare(double a, double b) { - int a_shift; - const int64_t a_fraction = IntegerFrExp(a, &a_shift); - int b_shift; - const int64_t b_fraction = IntegerFrExp(b, &b_shift); - - // Detect NaNs and infinities. - if (a_shift == std::numeric_limits::max() || - (b_shift == std::numeric_limits::max())) { - return 1; - } - - if ((a_fraction == 0) && (b_fraction < 0)) { - return 1; - } else if ((a_fraction < 0) && (b_fraction == 0)) { - return -1; - } else if (a_shift < b_shift) { - return -1; - } else if (a_shift > b_shift) { - return 1; - } else if (a_fraction < b_fraction) { - return -1; - } else if (a_fraction > b_fraction) { - return 1; - } else { - return 0; - } -} - -void PreprocessSoftmaxScaling(double beta, double input_scale, - int input_integer_bits, - int32_t* quantized_multiplier, int* left_shift) { - // If the overall multiplier (input and beta) is large, then exp() of an - // input difference of 1 scaled by this will be large. In other words, we - // can cap the multiplier and know that, when it is used, the output will be - // (round to) zero wherever the input is not at the maximum value. - - // If the overall scale is less than one, and input_integer_bits=0, then the - // result is double equivalent of Q0.31 (actually with more precision). Thus - // this generates a Q(input_integer_bits).(31-input_integer_bits) - // representation. -#if TFLITE_SINGLE_ROUNDING - const double max_real_multiplier = (1LL << 30) - 1.0; -#else - const double max_real_multiplier = (1LL << 31) - 1.0; -#endif - -#ifdef TFLITE_EMULATE_FLOAT - const double input_beta = IntegerDoubleMultiply(beta, input_scale); - int shift; - int64_t fraction = IntegerFrExp(input_beta, &shift); - shift += (31 - input_integer_bits); - double input_beta_real_multiplier = - DoubleFromFractionAndShift(fraction, shift); - if (IntegerDoubleCompare(input_beta_real_multiplier, max_real_multiplier) > - 0) { - input_beta_real_multiplier = max_real_multiplier; - } -#else // TFLITE_EMULATE_FLOAT - const double input_beta_real_multiplier = - std::min(beta * input_scale * (1 << (31 - input_integer_bits)), - max_real_multiplier); -#endif // TFLITE_EMULATE_FLOAT - - QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, - quantized_multiplier, left_shift); -} - -void PreprocessLogSoftmaxScalingExp(double beta, double input_scale, - int input_integer_bits, - int32_t* quantized_multiplier, - int* left_shift, - int32_t* reverse_scaling_divisor, - int* reverse_scaling_left_shift) { - PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits, - quantized_multiplier, left_shift); - - // Also calculate what amounts to the inverse scaling factor for the input. - const double real_reverse_scaling_divisor = - (1 << (31 - *left_shift)) / static_cast(*quantized_multiplier); - tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor, - reverse_scaling_divisor, - reverse_scaling_left_shift); -} - -int CalculateInputRadius(int input_integer_bits, int input_left_shift, - int total_signed_bits) { -#ifdef TFLITE_EMULATE_FLOAT - int64_t result = (1 << input_integer_bits) - 1; - result <<= (total_signed_bits - input_integer_bits); - result >>= input_left_shift; - return result; -#else // TFLITE_EMULATE_FLOAT - const double max_input_rescaled = - 1.0 * ((1 << input_integer_bits) - 1) * - (1LL << (total_signed_bits - input_integer_bits)) / - (1LL << input_left_shift); - // Tighten bound using floor. Suppose that we could use the exact value. - // After scaling the difference, the result would be at the maximum. Thus we - // must ensure that our value has lower magnitude. - return static_cast(std::floor(max_input_rescaled)); -#endif // TFLITE_EMULATE_FLOAT -} - -void NudgeQuantizationRange(const float min, const float max, - const int quant_min, const int quant_max, - float* nudged_min, float* nudged_max, - float* nudged_scale) { - // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h. - const float quant_min_float = static_cast(quant_min); - const float quant_max_float = static_cast(quant_max); - *nudged_scale = (max - min) / (quant_max_float - quant_min_float); - const float zero_point_from_min = quant_min_float - min / *nudged_scale; - uint16_t nudged_zero_point; - if (zero_point_from_min < quant_min_float) { - nudged_zero_point = static_cast(quant_min); - } else if (zero_point_from_min > quant_max_float) { - nudged_zero_point = static_cast(quant_max); - } else { - nudged_zero_point = static_cast(TfLiteRound(zero_point_from_min)); - } - *nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale); - *nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale); -} - -void FakeQuantizeArray(const float nudged_scale, const float nudged_min, - const float nudged_max, const float* input_data, - float* output_data, const float size) { - // This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h. - const float inv_nudged_scale = 1.0f / nudged_scale; - - for (int i = 0; i < size; i++) { - const float src_val = input_data[i]; - const float clamped = std::min(nudged_max, std::max(nudged_min, src_val)); - const float clamped_shifted = clamped - nudged_min; - const float dst_val = - TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale + - nudged_min; - output_data[i] = dst_val; - } -} - -bool CheckedLog2(const float x, int* log2_result) { - // Using TfLiteRound instead of std::round and std::log instead of - // std::log2 to work around these functions being missing in a toolchain - // used in some TensorFlow tests as of May 2018. - const float x_log2 = std::log(x) * (1.0f / std::log(2.0f)); - const float x_log2_rounded = TfLiteRound(x_log2); - const float x_log2_fracpart = x_log2 - x_log2_rounded; - - *log2_result = static_cast(x_log2_rounded); - return std::abs(x_log2_fracpart) < 1e-3f; -} - -void QuantizeMultiplierArray(const double* effective_scales, size_t size, - int32_t* effective_scale_significand, - int* effective_shift) { - for (size_t i = 0; i < size; ++i) { - QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i], - &effective_shift[i]); - } -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.h deleted file mode 100644 index 0ee914b0..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/quantization_util.h +++ /dev/null @@ -1,292 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ - -#include -#include -#include - -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -// Given the min and max values of a float array, return -// reasonable quantization parameters to use for this array. -template -QuantizationParams ChooseQuantizationParams(double rmin, double rmax, - bool narrow_range) { - const T qmin = std::numeric_limits::min() + (narrow_range ? 1 : 0); - const T qmax = std::numeric_limits::max(); - const double qmin_double = qmin; - const double qmax_double = qmax; - // 0 should always be a representable value. Let's assume that the initial - // min,max range contains 0. - TFLITE_CHECK_LE(rmin, 0.); - TFLITE_CHECK_GE(rmax, 0.); - if (rmin == rmax) { - // Special case where the min,max range is a point. Should be {0}. - TFLITE_CHECK_EQ(rmin, 0.); - TFLITE_CHECK_EQ(rmax, 0.); - QuantizationParams quantization_params; - quantization_params.zero_point = 0; - quantization_params.scale = 0.; - return quantization_params; - } - - // General case. - // - // First determine the scale. - const double scale = (rmax - rmin) / (qmax_double - qmin_double); - - // Zero-point computation. - // First the initial floating-point computation. The zero-point can be - // determined from solving an affine equation for any known pair - // (real value, corresponding quantized value). - // We know two such pairs: (rmin, qmin) and (rmax, qmax). - // The arithmetic error on the zero point computed from either pair - // will be roughly machine_epsilon * (sum of absolute values of terms) - // so we want to use the variant that adds the smaller terms. - const double zero_point_from_min = qmin_double - rmin / scale; - const double zero_point_from_max = qmax_double - rmax / scale; - const double zero_point_from_min_error = - std::abs(qmin_double) + std::abs(rmin / scale); - const double zero_point_from_max_error = - std::abs(qmax_double) + std::abs(rmax / scale); - - const double zero_point_double = - zero_point_from_min_error < zero_point_from_max_error - ? zero_point_from_min - : zero_point_from_max; - - // Now we need to nudge the zero point to be an integer - // (our zero points are integer, and this is motivated by the requirement - // to be able to represent the real value "0" exactly as a quantized value, - // which is required in multiple places, for example in Im2col with SAME - // padding). - T nudged_zero_point = 0; - if (zero_point_double < qmin_double) { - nudged_zero_point = qmin; - } else if (zero_point_double > qmax_double) { - nudged_zero_point = qmax; - } else { - nudged_zero_point = static_cast(round(zero_point_double)); - } - // The zero point should always be in the range of quantized value, - // [qmin, qmax]. - TFLITE_CHECK_GE(nudged_zero_point, qmin); - TFLITE_CHECK_LE(nudged_zero_point, qmax); - - // Finally, store the result nudged quantization params. - QuantizationParams quantization_params; - quantization_params.zero_point = nudged_zero_point; - quantization_params.scale = scale; - return quantization_params; -} - -template -QuantizationParams ChooseQuantizationParams(double rmin, double rmax) { - return ChooseQuantizationParams(rmin, rmax, false); -} - -// Converts a floating-point number to an integer. For all inputs x where -// static_cast(x) is legal according to the C++ standard, the result -// is identical to that cast (i.e. the result is x with its fractional part -// truncated whenever that is representable as IntOut). -// -// static_cast would cause undefined behavior for the following cases, which -// have well-defined behavior for this function: -// -// 1. If x is NaN, the result is zero. -// -// 2. If the truncated form of x is above the representable range of IntOut, -// the result is std::numeric_limits::max(). -// -// 3. If the truncated form of x is below the representable range of IntOut, -// the result is std::numeric_limits::min(). -// -// Note that cases #2 and #3 cover infinities as well as finite numbers. -// -// The range of FloatIn must include the range of IntOut, otherwise -// the results are undefined. -// TODO(sfeuz): Replace by absl::SafeCast once available. -template -IntOut SafeCast(FloatIn x) { - static_assert(!std::numeric_limits::is_integer, - "FloatIn is integer"); - static_assert(std::numeric_limits::is_integer, - "IntOut is not integer"); - static_assert(std::numeric_limits::radix == 2, "IntOut is base 2"); - - // Special case NaN, for which the logic below doesn't work. - if (std::isnan(x)) { - return 0; - } - - // Negative values all clip to zero for unsigned results. - if (!std::numeric_limits::is_signed && x < 0) { - return 0; - } - - // Handle infinities. - if (std::isinf(x)) { - return x < 0 ? std::numeric_limits::min() - : std::numeric_limits::max(); - } - - // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0), - // unless x is zero in which case exp == 0. Note that this implies that the - // magnitude of x is strictly less than 2^exp. - int exp = 0; - std::frexp(x, &exp); - - // Let N be the number of non-sign bits in the representation of IntOut. If - // the magnitude of x is strictly less than 2^N, the truncated version of x - // is representable as IntOut. The only representable integer for which this - // is not the case is kMin for signed types (i.e. -2^N), but that is covered - // by the fall-through below. - if (exp <= std::numeric_limits::digits) { - return x; - } - - // Handle numbers with magnitude >= 2^N. - return x < 0 ? std::numeric_limits::min() - : std::numeric_limits::max(); -} - -// Decompose a double multiplier into a Q0.31 int32 representation of its -// significand, and shift representation of NEGATIVE its exponent --- -// this is intended as a RIGHT-shift. -// -// Restricted to the case where the multiplier < 1 (and non-negative). -void QuantizeMultiplierSmallerThanOneExp(double double_multiplier, - int32_t* quantized_multiplier, - int* left_shift); - -// Decompose a double multiplier into a Q0.31 int32 representation of its -// significand, and shift representation of its exponent. -// -// Restricted to the case where the multiplier > 1. -void QuantizeMultiplierGreaterThanOne(double double_multiplier, - int32_t* quantized_multiplier, - int* left_shift); - -// Decompose a double multiplier into a Q0.31 int32 representation of its -// significand, and shift representation of its exponent. -// -// Handles an arbitrary positive multiplier. The 'shift' output-value is -// basically the 'floating-point exponent' of the multiplier: -// Negative for a right-shift (when the multiplier is <1), positive for a -// left-shift (when the multiplier is >1) -void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, - int* shift); - -// Splits a double input value into a returned fraction, and a shift value from -// the exponent, using only bitwise and integer operations to support -// microcontrollers and other environments without floating-point support. -// -// This is designed to be a replacement for how std::frexp() is used within the -// QuantizeMultiplier() function, and so has a different signature than the -// standard version, returning a 64-bit integer rather than a double. This -// result has a maximum value of 1<<31, with the fraction expressed as a -// proportion of that maximum. -// -// std::frexp() returns NaNs and infinities unmodified, but since we're -// returning integers that can't represent those values, instead we return -// a shift of std::numeric_limits::max() for all bad numbers, with an int64 -// result of 0 for NaNs, std:numeric_limits::max() for +INFINITY, and -// std::numeric_limits::min() for -INFINITY. Denormalized inputs will -// result in return values that end up truncating some bits at the end, -// reflecting the loss of precision inherent in denormalization. -int64_t IntegerFrExp(double input, int* shift); - -// Converts an integer fraction in the format produced by IntegerFrExp (where -// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an -// IEEE binary64 double format result. The implementation uses only integer and -// bitwise operators, so no floating point hardware support or emulation is -// needed. This is here so quantized operations can run non-time-critical -// preparation calculations on microcontrollers and other platforms without -// float support. -double DoubleFromFractionAndShift(int64_t fraction, int shift); - -// Performs a multiplication of two numbers in double format, using only integer -// and bitwise instructions. This is aimed at supporting housekeeping functions -// for quantized operations on microcontrollers without floating-point hardware. -double IntegerDoubleMultiply(double a, double b); - -// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is -// greater than b. It is implemented using only integer and logical instructions -// so that it can be easily run on microcontrollers for quantized operations. -int IntegerDoubleCompare(double a, double b); - -// This first creates a multiplier in a double equivalent of -// Q(input_integer_bits).(31-input_integer_bits) representation, with extra -// precision in the double's fractional bits. It then splits the result into -// significand and exponent. -void PreprocessSoftmaxScaling(double beta, double input_scale, - int input_integer_bits, - int32_t* quantized_multiplier, int* left_shift); -// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated. -void PreprocessLogSoftmaxScalingExp(double beta, double input_scale, - int input_integer_bits, - int32_t* quantized_multiplier, - int* left_shift, - int32_t* reverse_scaling_divisor, - int* reverse_scaling_left_shift); -// Calculate the largest input that will result in a within-bounds intermediate -// result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words, -// it must not overflow before we reduce the value by multiplication by the -// input multiplier. The negative radius is used as the minimum difference in -// Softmax. -int CalculateInputRadius(int input_integer_bits, int input_left_shift, - int total_signed_bits = 31); - -// Nudges a min/max quantization range to ensure zero is zero. -// Gymnastics with nudged zero point is to ensure that real zero maps to -// an integer, which is required for e.g. zero-padding in convolutional layers. -// Outputs nudged_min, nudged_max, nudged_scale. -void NudgeQuantizationRange(const float min, const float max, - const int quant_min, const int quant_max, - float* nudged_min, float* nudged_max, - float* nudged_scale); - -// Fake quantizes (quantizes and dequantizes) input_data using the scale, -// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code -// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor. -void FakeQuantizeArray(const float nudged_scale, const float nudged_min, - const float nudged_max, const float* input_data, - float* output_data, const float size); - -// If x is approximately a power of two (with any positive or negative -// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise -// returns false. -bool CheckedLog2(const float x, int* log2_result); - -// Decomposes an array of double multipliers into a Q0.31 int32 representation -// of its significand, and shift representation of its exponent. -// -// Handles an arbitrary multiplier. The 'shift' output-value is -// basically the 'floating-point exponent' of the multiplier: -// Negative for a right-shift (when the multiplier is <1), positive for a -// left-shift (when the multiplier is >1) -void QuantizeMultiplierArray(const double* effective_scales, size_t size, - int32_t* effective_scale_significand, - int* effective_shift); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add.h deleted file mode 100644 index 1f521316..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add.h +++ /dev/null @@ -1,400 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_ - -#include -#include - -#include "fixedpoint/fixedpoint.h" -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { - -namespace reference_ops { - -template -inline void Add(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const T* input1_data, - const RuntimeShape& input2_shape, const T* input2_data, - const RuntimeShape& output_shape, T* output_data) { - T activation_min, activation_max; - GetActivationParams(params, &activation_min, &activation_max); - - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = ActivationFunctionWithMinMax( - input1_data[i] + input2_data[i], activation_min, activation_max); - } -} - -// Element-wise add that can often be used for inner loop of broadcast add as -// well as the non-broadcast add. - -// This function is used for 8-bit as well as for 16-bit, but the accumulator -// is 32-bit for both cases. The overflow does not happen due to the -// choice of the shift (20 or 15, accordingly - see add.cc for more comments). -template -inline void AddElementwise(int size, const ArithmeticParams& params, - const T* input1_data, const T* input2_data, - T* output_data) { - TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits::max()); - TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits::max()); - TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits::max()); - TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits::max()); - - for (int i = 0; i < size; ++i) { - const int32_t input1_val = params.input1_offset + input1_data[i]; - const int32_t input2_val = params.input2_offset + input2_data[i]; - const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); - const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32_t raw_sum = scaled_input1_val + scaled_input2_val; - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - raw_sum, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); - } -} - -// Scalar-broadcast add that can be used for inner loop of more general -// broadcast add, so that, for example, scalar-broadcast with batch will still -// be fast. -inline void AddScalarBroadcast(int size, const ArithmeticParams& params, - uint8_t input1_data, const uint8_t* input2_data, - uint8_t* output_data) { - TFLITE_DCHECK_GT(params.input1_offset, -256); - TFLITE_DCHECK_GT(params.input2_offset, -256); - TFLITE_DCHECK_LT(params.input1_offset, 256); - TFLITE_DCHECK_LT(params.input2_offset, 256); - - const int32_t input1_val = params.input1_offset + input1_data; - const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, params.input1_shift); - for (int i = 0; i < size; ++i) { - const int32_t input2_val = params.input2_offset + input2_data[i]; - const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32_t raw_sum = scaled_input1_val + scaled_input2_val; - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - raw_sum, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); - } -} - -inline void Add(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const uint8_t* input1_data, - const RuntimeShape& input2_shape, const uint8_t* input2_data, - const RuntimeShape& output_shape, uint8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - TFLITE_DCHECK_GT(params.input1_offset, -256); - TFLITE_DCHECK_GT(params.input2_offset, -256); - TFLITE_DCHECK_LT(params.input1_offset, 256); - TFLITE_DCHECK_LT(params.input2_offset, 256); - AddElementwise(flat_size, params, input1_data, input2_data, output_data); -} - -inline void AddGeneralParamScale(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const int16_t* input1_data, - const RuntimeShape& input2_shape, - const int16_t* input2_data, - const RuntimeShape& output_shape, - int16_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - int max_value = std::numeric_limits::max(); - - TFLITE_DCHECK_GT(params.input1_offset, -max_value); - TFLITE_DCHECK_GT(params.input2_offset, -max_value); - TFLITE_DCHECK_LT(params.input1_offset, max_value); - TFLITE_DCHECK_LT(params.input2_offset, max_value); - AddElementwise(flat_size, params, input1_data, input2_data, output_data); -} - -inline void Add(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const int16_t* input1_data, - const RuntimeShape& input2_shape, const int16_t* input2_data, - const RuntimeShape& output_shape, int16_t* output_data, - bool pot_scale = true) { - if (!pot_scale) { - AddGeneralParamScale(params, input1_shape, input1_data, input2_shape, - input2_data, output_shape, output_data); - return; - } - - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - - const int input1_shift = params.input1_shift; - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - const int16_t output_activation_min = params.quantized_activation_min; - const int16_t output_activation_max = params.quantized_activation_max; - - TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0); - TFLITE_DCHECK_LE(input1_shift, 0); - TFLITE_DCHECK_LE(params.input2_shift, 0); - const int16_t* not_shift_input = - input1_shift == 0 ? input1_data : input2_data; - const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data; - const int input_right_shift = - input1_shift == 0 ? -params.input2_shift : -input1_shift; - - for (int i = 0; i < flat_size; i++) { - // F0 uses 0 integer bits, range [-1, 1]. - using F0 = gemmlowp::FixedPoint; - - F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); - F0 scaled_input = F0::FromRaw( - gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); - F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled); - const int16_t raw_output = result.raw(); - const int16_t clamped_output = std::min( - output_activation_max, std::max(output_activation_min, raw_output)); - output_data[i] = clamped_output; - } -} - -template -inline typename std::enable_if::value, void>::type -BroadcastAdd4DSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const T* input1_data, - const RuntimeShape& input2_shape, const T* input2_data, - const RuntimeShape& output_shape, T* output_data) { - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(4, output_shape); - - T activation_min, activation_max; - GetActivationParams(params, &activation_min, &activation_max); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - for (int b = 0; b < extended_output_shape.Dims(0); ++b) { - for (int y = 0; y < extended_output_shape.Dims(1); ++y) { - for (int x = 0; x < extended_output_shape.Dims(2); ++x) { - for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - output_data[Offset(extended_output_shape, b, y, x, c)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, b, y, x, c)] + - input2_data[SubscriptToIndex(desc2, b, y, x, c)], - activation_min, activation_max); - } - } - } - } -} - -// This function is used for 8-bit as well as for 16-bit, but the accumulator -// is 32-bit for both cases. The overflow does not happen due to the -// choice of the shift (20 or 15, accordingly - see add.cc for more comments). -template -inline typename std::enable_if::value, void>::type -BroadcastAdd4DSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const T* input1_data, - const RuntimeShape& input2_shape, const T* input2_data, - const RuntimeShape& output_shape, T* output_data) { - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(4, output_shape); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - for (int b = 0; b < extended_output_shape.Dims(0); ++b) { - for (int y = 0; y < extended_output_shape.Dims(1); ++y) { - for (int x = 0; x < extended_output_shape.Dims(2); ++x) { - for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - const int32_t input1_val = - params.input1_offset + - input1_data[SubscriptToIndex(desc1, b, y, x, c)]; - const int32_t input2_val = - params.input2_offset + - input2_data[SubscriptToIndex(desc2, b, y, x, c)]; - const int32_t shifted_input1_val = - input1_val * (1 << params.left_shift); - const int32_t shifted_input2_val = - input2_val * (1 << params.left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, - params.input1_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, - params.input2_shift); - const int32_t raw_sum = scaled_input1_val + scaled_input2_val; - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - raw_sum, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[Offset(extended_output_shape, b, y, x, c)] = - static_cast(clamped_output); - } - } - } - } -} - -inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params, - const RuntimeShape& unswitched_input1_shape, - const uint8_t* unswitched_input1_data, - const RuntimeShape& unswitched_input2_shape, - const uint8_t* unswitched_input2_data, - const RuntimeShape& output_shape, - uint8_t* output_data) { - ArithmeticParams switched_params = unswitched_params; - switched_params.input1_offset = unswitched_params.input2_offset; - switched_params.input1_multiplier = unswitched_params.input2_multiplier; - switched_params.input1_shift = unswitched_params.input2_shift; - switched_params.input2_offset = unswitched_params.input1_offset; - switched_params.input2_multiplier = unswitched_params.input1_multiplier; - switched_params.input2_shift = unswitched_params.input1_shift; - - const bool use_unswitched = - unswitched_params.broadcast_category == - tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast; - - const ArithmeticParams& params = - use_unswitched ? unswitched_params : switched_params; - const uint8_t* input1_data = - use_unswitched ? unswitched_input1_data : unswitched_input2_data; - const uint8_t* input2_data = - use_unswitched ? unswitched_input2_data : unswitched_input1_data; - - // Fivefold nested loops. The second input resets its position for each - // iteration of the second loop. The first input resets its position at the - // beginning of the fourth loop. The innermost loop is an elementwise add of - // sections of the arrays. - uint8_t* output_data_ptr = output_data; - const uint8_t* input1_data_ptr = input1_data; - const uint8_t* input2_data_reset = input2_data; - // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared - // between input shapes. y3 for input 1 is always broadcast, and so the - // dimension there is 1, whereas optionally y1 might be broadcast for input 2. - // Put another way, - // input1.shape.FlatSize = y0 * y1 * y2 * y4, - // input2.shape.FlatSize = y0 * y2 * y3 * y4. - int y0 = params.broadcast_shape[0]; - int y1 = params.broadcast_shape[1]; - int y2 = params.broadcast_shape[2]; - int y3 = params.broadcast_shape[3]; - int y4 = params.broadcast_shape[4]; - if (y4 > 1) { - // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner - // dimension. - for (int i0 = 0; i0 < y0; ++i0) { - const uint8_t* input2_data_ptr; - for (int i1 = 0; i1 < y1; ++i1) { - input2_data_ptr = input2_data_reset; - for (int i2 = 0; i2 < y2; ++i2) { - for (int i3 = 0; i3 < y3; ++i3) { - AddElementwise(y4, params, input1_data_ptr, input2_data_ptr, - output_data_ptr); - input2_data_ptr += y4; - output_data_ptr += y4; - } - // We have broadcast y4 of input1 data y3 times, and now move on. - input1_data_ptr += y4; - } - } - // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on. - input2_data_reset = input2_data_ptr; - } - } else { - // Special case of y4 == 1, in which the innermost loop is a single element - // and can be combined with the next (y3) as an inner broadcast. - // - // Note that this handles the case of pure scalar broadcast when - // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar - // broadcast with batch (as y2 > 1). - // - // NOTE The process is the same as the above general case except simplified - // for y4 == 1 and the loop over y3 is contained within the - // AddScalarBroadcast function. - for (int i0 = 0; i0 < y0; ++i0) { - const uint8_t* input2_data_ptr; - for (int i1 = 0; i1 < y1; ++i1) { - input2_data_ptr = input2_data_reset; - for (int i2 = 0; i2 < y2; ++i2) { - AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr, - output_data_ptr); - input2_data_ptr += y3; - output_data_ptr += y3; - input1_data_ptr += 1; - } - } - input2_data_reset = input2_data_ptr; - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add_n.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add_n.h deleted file mode 100644 index b6b5882d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/add_n.h +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_ - -#include -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_ops { - -// T is expected to be either float or int. -template -inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs, - const T* const* input_data, T* output_data) { - // All inputs and output should have the same shape, this is checked during - // Prepare stage. - const size_t size = input_shape.FlatSize(); - for (size_t i = 0; i < size; ++i) { - T x = 0; - for (size_t j = 0; j < num_inputs; ++j) { - x += input_data[j][i]; - } - output_data[i] = x; - } -} - -inline void AddN(const ArithmeticParams& params, - const RuntimeShape& input_shape, const size_t num_inputs, - const int8_t* const* input_data, int8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - // Input offset is negative input zero point. Activation tensors are - // asymmetric quantized so they span the full int8 range. - // All inputs should have same zero-point and scale, this is checked during - // Prepare stage. - TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits::min()); - TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits::max()); - - // All inputs and output should have the same shape, this is checked during - // Prepare stage. - const size_t size = input_shape.FlatSize(); - for (size_t i = 0; i < size; ++i) { - // accumulate in scaled_x before clamping to avoid overflow - const int32_t x = params.input1_offset; // x = 0 - const int32_t shifted_x = x * (1 << params.left_shift); - int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_x, params.input1_multiplier, params.input1_shift); - - for (size_t j = 0; j < num_inputs; ++j) { - const int32_t y = params.input1_offset + input_data[j][i]; - const int32_t shifted_y = y * (1 << params.left_shift); - int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_y, params.input1_multiplier, params.input1_shift); - scaled_x += scaled_y; - } - - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - scaled_x, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h deleted file mode 100644 index 8154fbf7..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/arg_min_max.h +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_ - -#include - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -template -std::function GetComparefunction(bool is_arg_max) { - if (is_arg_max) { - return std::greater(); - } else { - return std::less(); - } -} - -template -void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data, - const T3* input2_data, const RuntimeShape& output_shape, - T2* output_data, const Cmp& cmp) { - TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0); - TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1, - output_shape.DimensionsCount()); - int axis = input2_data[0]; - if (axis < 0) { - axis += input1_shape.DimensionsCount(); - } - const int axis_size = input1_shape.Dims(axis); - - int outer_size = 1; - for (int i = 0; i < axis; ++i) { - TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i)); - outer_size *= input1_shape.Dims(i); - } - - int inner_size = 1; - const int dims_count = input1_shape.DimensionsCount(); - for (int i = axis + 1; i < dims_count; ++i) { - TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1)); - inner_size *= input1_shape.Dims(i); - } - for (int outer = 0; outer < outer_size; ++outer) { - for (int inner = 0; inner < inner_size; ++inner) { - auto min_max_value = input1_data[outer * axis_size * inner_size + inner]; - T2 min_max_index = 0; - for (int i = 1; i < axis_size; ++i) { - const auto& curr_value = - input1_data[(outer * axis_size + i) * inner_size + inner]; - if (cmp(curr_value, min_max_value)) { - min_max_value = curr_value; - min_max_index = static_cast(i); - } - } - output_data[outer * inner_size + inner] = min_max_index; - } - } -} - -template -void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data, - const T3* input2_data, const RuntimeShape& output_shape, - T2* output_data, const bool is_arg_max) { - ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, - GetComparefunction(is_arg_max)); -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_matmul.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_matmul.h deleted file mode 100644 index 767ad6ab..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_matmul.h +++ /dev/null @@ -1,275 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_ - -#include -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { -namespace batch_matmul { - -// Determine which dimension is the broadcast dimension. -inline int broadcast_dim(int lhs_dim, int rhs_dim) { - if (lhs_dim == rhs_dim) return lhs_dim; - if (lhs_dim == 1) return rhs_dim; - TFLITE_DCHECK_EQ(rhs_dim, 1); - return lhs_dim; -} - -// Compute the "extent" for iterating on this dimension. -// If we are broadcasting, then don't advance (i.e return 0). -inline int extent(const RuntimeShape& shape, int x) { - if (shape.Dims(x) == 1) { - return 0; - } - int prod = 1; - for (int i = x + 1; i < shape.DimensionsCount(); ++i) { - prod *= shape.Dims(i); - } - return prod; -} - -} // namespace batch_matmul - -template -inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data, - const RuntimeShape& rhs_shape, const Tb* rhs_data, - const RuntimeShape& output_shape, Tout* output_data) { - const RuntimeShape extended_lhs_shape = - RuntimeShape::ExtendedShape(5, lhs_shape); - const RuntimeShape extended_rhs_shape = - RuntimeShape::ExtendedShape(5, rhs_shape); - - const int batch_dim0 = batch_matmul::broadcast_dim( - extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0)); - const int batch_dim1 = batch_matmul::broadcast_dim( - extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1)); - const int batch_dim2 = batch_matmul::broadcast_dim( - extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2)); - - const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0); - const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1); - const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2); - const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0); - const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1); - const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2); - - // Set params for each matrix multiply. - const int lhs_rows = extended_lhs_shape.Dims(3); - const int rhs_cols = extended_rhs_shape.Dims(4); - const int accum_depth = extended_lhs_shape.Dims(4); - - for (int b0 = 0; b0 < batch_dim0; ++b0) { - const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0); - const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0); - for (int b1 = 0; b1 < batch_dim1; ++b1) { - const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1; - const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1; - for (int b2 = 0; b2 < batch_dim2; ++b2) { - const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; - const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; - Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) + - b1 * batch_dim2 + b2) * - lhs_rows * rhs_cols; - for (int j = 0; j < rhs_cols; ++j) { - for (int i = 0; i < lhs_rows; ++i) { - Tout total = 0; - for (int k = 0; k < accum_depth; ++k) { - total += static_cast(lhs_ptr2[accum_depth * i + k]) * - static_cast(rhs_ptr2[j * accum_depth + k]); - } - int idx = lhs_rows * j + i; - out_ptr[idx] = total; - } - } - } - } - } -} - -inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data, - const RuntimeShape& rhs_shape, const int8_t* rhs_data, - const float* scaling_factors, - const int32_t* input_offset, int32_t* row_sums, - const RuntimeShape& output_shape, float* output_data, - bool* compute_row_sums) { - const RuntimeShape extended_lhs_shape = - RuntimeShape::ExtendedShape(5, lhs_shape); - const RuntimeShape extended_rhs_shape = - RuntimeShape::ExtendedShape(5, rhs_shape); - - const int batch_dim0 = batch_matmul::broadcast_dim( - extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0)); - const int batch_dim1 = batch_matmul::broadcast_dim( - extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1)); - const int batch_dim2 = batch_matmul::broadcast_dim( - extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2)); - - const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0); - const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1); - const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2); - const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0); - const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1); - const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2); - - // Set params for each matrix multiply. - const int lhs_rows = extended_lhs_shape.Dims(3); - const int rhs_cols = extended_rhs_shape.Dims(4); - const int accum_depth = extended_lhs_shape.Dims(4); - - const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols; - const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols; - const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols; - const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows; - const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows; - const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows; - - if (!compute_row_sums || *compute_row_sums) { - int num_weights_matrices = 1; - for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) { - num_weights_matrices *= extended_lhs_shape.Dims(i); - } - tensor_utils::ReductionSumVector( - lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth); - if (compute_row_sums) { - *compute_row_sums = false; - } - } - - for (int b0 = 0; b0 < batch_dim0; ++b0) { - const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0); - const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0); - const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0); - const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0); - const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0); - for (int b1 = 0; b1 < batch_dim1; ++b1) { - const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1; - const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1; - const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1); - const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1); - const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1); - for (int b2 = 0; b2 < batch_dim2; ++b2) { - const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; - const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; - const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2); - const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2); - const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2); - float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) + - b1 * batch_dim2 + b2) * - lhs_rows * rhs_cols; - for (int j = 0; j < rhs_cols; ++j) { - const float batch_scaling_factor = scale_ptr2[j]; - const float batch_offset = static_cast(ioff_ptr2[j]); - for (int i = 0; i < lhs_rows; ++i) { - int32_t total = 0; - for (int k = 0; k < accum_depth; ++k) { - total += - lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k]; - } - int32_t row_sum = woff_ptr2[i]; - total -= row_sum * batch_offset; - int idx = lhs_rows * j + i; - out_ptr[idx] += batch_scaling_factor * total; - } - } - } - } - } -} - -template -inline void BatchMatMul(const FullyConnectedParams& params, - const RuntimeShape& lhs_shape, const T* lhs_data, - const RuntimeShape& rhs_shape, const T* rhs_data, - const RuntimeShape& output_shape, T* output_data) { - const RuntimeShape extended_lhs_shape = - RuntimeShape::ExtendedShape(5, lhs_shape); - const RuntimeShape extended_rhs_shape = - RuntimeShape::ExtendedShape(5, rhs_shape); - - const int batch_dim0 = batch_matmul::broadcast_dim( - extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0)); - const int batch_dim1 = batch_matmul::broadcast_dim( - extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1)); - const int batch_dim2 = batch_matmul::broadcast_dim( - extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2)); - - const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0); - const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1); - const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2); - const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0); - const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1); - const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2); - - // Set params for each matrix multiply. - const int lhs_rows = extended_lhs_shape.Dims(3); - const int rhs_cols = extended_rhs_shape.Dims(4); - const int accum_depth = extended_lhs_shape.Dims(4); - - const int32_t input_offset = params.input_offset; - const int32_t filter_offset = params.weights_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - - for (int b0 = 0; b0 < batch_dim0; ++b0) { - const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0); - const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0); - for (int b1 = 0; b1 < batch_dim1; ++b1) { - const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1; - const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1; - for (int b2 = 0; b2 < batch_dim2; ++b2) { - const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; - const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; - T* out_ptr = output_data + - ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) * - lhs_rows * rhs_cols; - - for (int j = 0; j < rhs_cols; ++j) { - for (int i = 0; i < lhs_rows; ++i) { - AccumT total = 0; - for (int k = 0; k < accum_depth; ++k) { - AccumT lhs_val = lhs_ptr2[accum_depth * i + k]; - AccumT rhs_val = rhs_ptr2[accum_depth * j + k]; - total += (lhs_val + filter_offset) * (rhs_val + input_offset); - } - int32_t total_scaled = MultiplyByQuantizedMultiplier( - total, output_multiplier, output_shift); - total_scaled += output_offset; - total_scaled = std::max(total_scaled, output_activation_min); - total_scaled = std::min(total_scaled, output_activation_max); - const int idx = lhs_rows * j + i; - out_ptr[idx] = static_cast(total_scaled); - } - } - } - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h deleted file mode 100644 index cda46a26..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_ - -#include - -#include "ruy/profiler/instrumentation.h" // from @ruy -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -// TODO(b/135760455): Move this method anonymous namespace in a cc file. -inline RuntimeShape ExtendShapeBatchToSpace(const RuntimeShape& shape) { - if (shape.DimensionsCount() == 4) { - return shape; - } - RuntimeShape new_shape(4, 1); - new_shape.SetDim(0, shape.Dims(0)); - new_shape.SetDim(1, shape.Dims(1)); - new_shape.SetDim(3, shape.Dims(2)); - return new_shape; -} - -template -inline void BatchToSpaceND(const RuntimeShape& unextended_input1_shape, - const T* input1_data, - const RuntimeShape& unextended_input2_shape, - const int32_t* block_shape_data, - const RuntimeShape& unextended_input3_shape, - const int32_t* crops_data, - const RuntimeShape& unextended_output_shape, - T* output_data) { - ruy::profiler::ScopeLabel label("BatchToSpaceND"); - TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3); - TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(), - unextended_output_shape.DimensionsCount()); - - const RuntimeShape input1_shape = - ExtendShapeBatchToSpace(unextended_input1_shape); - const RuntimeShape output_shape = - ExtendShapeBatchToSpace(unextended_output_shape); - - const int output_width = output_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_batch_size = output_shape.Dims(0); - - const int depth = input1_shape.Dims(3); - const int input_width = input1_shape.Dims(2); - const int input_height = input1_shape.Dims(1); - const int input_batch_size = input1_shape.Dims(0); - - const int block_shape_height = block_shape_data[0]; - const int block_shape_width = - unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1; - const int crops_top = crops_data[0]; - const int crops_left = - unextended_input1_shape.DimensionsCount() == 4 ? crops_data[2] : 0; - for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) { - const int out_batch = in_batch % output_batch_size; - const int spatial_offset = in_batch / output_batch_size; - for (int in_h = 0; in_h < input_height; ++in_h) { - const int out_h = in_h * block_shape_height + - spatial_offset / block_shape_width - crops_top; - if (out_h < 0 || out_h >= output_height) { - continue; - } - for (int in_w = 0; in_w < input_width; ++in_w) { - const int out_w = in_w * block_shape_width + - spatial_offset % block_shape_width - crops_left; - - if (out_w < 0 || out_w >= output_width) { - continue; - } - T* out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0); - const T* in = - input1_data + Offset(input1_shape, in_batch, in_h, in_w, 0); - memcpy(out, in, depth * sizeof(T)); - } - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/binary_function.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/binary_function.h deleted file mode 100644 index 0b124af8..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/binary_function.h +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_ - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -// Also appears to duplicate MinimumMaximum. -// -// R: Result type. T1: Input 1 type. T2: Input 2 type. -template -inline void BroadcastBinaryFunction4DSlow( - const RuntimeShape& unextended_input1_shape, const T1* input1_data, - const RuntimeShape& unextended_input2_shape, const T2* input2_data, - const RuntimeShape& unextended_output_shape, R* output_data, - R (*func)(T1, T2)) { - TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, - unextended_input2_shape, &desc1, &desc2); - - const int* dims_data = - reinterpret_cast(output_shape.DimsDataUpTo5D()); - for (int b = 0; b < output_shape.Dims(0); ++b) { - int out_idx_b = b * dims_data[1]; - int in_idx1_b = desc1.strides[0] * b; - int in_idx2_b = desc2.strides[0] * b; - for (int y = 0; y < output_shape.Dims(1); ++y) { - int out_idx_y = (out_idx_b + y) * dims_data[2]; - int in_idx1_y = in_idx1_b + desc1.strides[1] * y; - int in_idx2_y = in_idx2_b + desc2.strides[1] * y; - for (int x = 0; x < output_shape.Dims(2); ++x) { - int out_idx_x = (out_idx_y + x) * dims_data[3]; - int in1_idx = in_idx1_y + desc1.strides[2] * x; - int in2_idx = in_idx2_y + desc2.strides[2] * x; - for (int c = 0; c < output_shape.Dims(3); ++c) { - auto out_idx = out_idx_x + c; - auto in1_val = input1_data[in1_idx]; - auto in2_val = input2_data[in2_idx]; - output_data[out_idx] = func(in1_val, in2_val); - in1_idx += desc1.strides[3]; - in2_idx += desc2.strides[3]; - } - } - } - } -} - -// R: Result type. T1: Input 1 type. T2: Input 2 type. -template -inline void BinaryFunction(const RuntimeShape& input1_shape, - const T1* input1_data, - const RuntimeShape& input2_shape, - const T2* input2_data, - const RuntimeShape& output_shape, R* output_data, - R (*func)(T1, T2)) { - const int flat_size = - MatchingFlatSize(input1_shape, input2_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = func(input1_data[i], input2_data[i]); - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_args.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_args.h deleted file mode 100644 index d93c316d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_args.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_ - -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -template -void BroadcastArgs(const RuntimeShape& input1_shape, const T* input1_data, - const RuntimeShape& input2_shape, const T* input2_data, - const RuntimeShape& output_shape, T* output_data) { - // Gets data at the backward index i of the shape tensor. Returns 1 if the - // index is out of range. - auto get_shape_data = [](const RuntimeShape& shape, const T* data, - int backward_idx) -> T { - int forward_idx = shape.FlatSize() - 1 - backward_idx; - if (forward_idx < 0) return 1; - return data[forward_idx]; - }; - - int output_num_elements = output_shape.FlatSize(); - for (int i = 0; i < output_num_elements; ++i) { - int backward_i = output_num_elements - 1 - i; - int shape1_i = get_shape_data(input1_shape, input1_data, i); - int shape2_i = get_shape_data(input2_shape, input2_data, i); - if (shape1_i == 1) { - output_data[backward_i] = shape2_i; - } else if (shape2_i == 1) { - output_data[backward_i] = shape1_i; - } else { - TFLITE_CHECK_EQ(shape1_i, shape2_i); - output_data[backward_i] = shape1_i; - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_to.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_to.h deleted file mode 100644 index f106b2b5..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/broadcast_to.h +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/kernel_util.h" - -namespace tflite { -namespace reference_ops { -template -void BroadcastImpl(const NdArrayDesc& input_desc, const char* input_data, - const NdArrayDesc& output_desc, char* output_data, - int indexes[N], int dim, const int last_broadcasting_dim, - const int type_size) { - // Copy data from input to output. - if (dim == last_broadcasting_dim) { - int copy_size = output_desc.strides[dim] * type_size; - const char* data_src = - input_data + SubscriptToIndex(input_desc, indexes) * type_size; - char* data_dst = - output_data + SubscriptToIndex(output_desc, indexes) * type_size; - for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) { - memcpy(data_dst, data_src, copy_size); - } - return; - } - - // Recursive call to find the next broadcasting. - for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim]; - ++indexes[dim]) { - BroadcastImpl(input_desc, input_data, output_desc, output_data, indexes, - dim + 1, last_broadcasting_dim, type_size); - } - - // Duplicate data in output tensor. - indexes[dim] = 0; - if (input_desc.extents[dim] != output_desc.extents[dim]) { - int copy_size = output_desc.strides[dim] * type_size; - char* data_src = - output_data + SubscriptToIndex(output_desc, indexes) * type_size; - char* data_dst = data_src + copy_size; - for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) { - memcpy(data_dst, data_src, copy_size); - } - } -} - -template -inline void BroadcastTo(const RuntimeShape& unextended_input_shape, - const char* input_data, - const RuntimeShape& unextended_output_shape, - char* output_data, TfLiteType data_type) { - NdArrayDesc input_desc; - NdArrayDesc output_desc; - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape), - &input_desc); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), - &output_desc); - - // Get the last dimension has broadcasting. At this dimension, the data is - // copied from input tensor to output tensor. - int last_broadcast_dim = -1; - for (int i = N - 1; i >= 0; --i) { - if (input_desc.extents[i] != output_desc.extents[i]) { - last_broadcast_dim = i; - break; - } - } - - // If non-broadcasting, just copy data from input to output tensor. - if (last_broadcast_dim == -1) { - memcpy(output_data, input_data, - unextended_input_shape.FlatSize() * TfLiteTypeGetSize(data_type)); - return; - } - - // Broadcasting using memcpy. - int indexes[N] = {0}; - BroadcastImpl(input_desc, input_data, output_desc, output_data, indexes, 0, - last_broadcast_dim, TfLiteTypeGetSize(data_type)); -} -} // namespace reference_ops -} // namespace tflite -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/ceil.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/ceil.h deleted file mode 100644 index 66d1dc35..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/ceil.h +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_ - -#include - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -inline void Ceil(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - for (int i = 0; i < flat_size; ++i) { - output_data[i] = std::ceil(input_data[i]); - } -} - -} // namespace reference_ops -} // namespace tflite -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/comparisons.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/comparisons.h deleted file mode 100644 index 6344bdc7..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/comparisons.h +++ /dev/null @@ -1,280 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -template -inline bool EqualFn(T lhs, T rhs) { - return lhs == rhs; -} - -template -inline bool NotEqualFn(T lhs, T rhs) { - return lhs != rhs; -} - -template -inline bool GreaterFn(T lhs, T rhs) { - return lhs > rhs; -} -template -inline bool GreaterEqualFn(T lhs, T rhs) { - return lhs >= rhs; -} -template -inline bool LessFn(T lhs, T rhs) { - return lhs < rhs; -} -template -inline bool LessEqualFn(T lhs, T rhs) { - return lhs <= rhs; -} - -template -using ComparisonFn = bool (*)(T, T); - -template F> -inline void ComparisonImpl( - const ComparisonParams& op_params, const RuntimeShape& input1_shape, - const T* input1_data, const RuntimeShape& input2_shape, - const T* input2_data, const RuntimeShape& output_shape, bool* output_data) { - const int64_t flatsize = - MatchingFlatSize(input1_shape, input2_shape, output_shape); - for (int64_t i = 0; i < flatsize; ++i) { - output_data[i] = F(input1_data[i], input2_data[i]); - } -} - -template F> -inline void Comparison(const ComparisonParams& op_params, - const RuntimeShape& input1_shape, - const float* input1_data, - const RuntimeShape& input2_shape, - const float* input2_data, - const RuntimeShape& output_shape, bool* output_data) { - ComparisonImpl(op_params, input1_shape, input1_data, input2_shape, - input2_data, output_shape, output_data); -} - -template F> -inline void ComparisonWithScaling( - const ComparisonParams& op_params, const RuntimeShape& input1_shape, - const T* input1_data, const RuntimeShape& input2_shape, - const T* input2_data, const RuntimeShape& output_shape, bool* output_data) { - int left_shift = op_params.left_shift; - int32_t input1_offset = op_params.input1_offset; - int32_t input1_multiplier = op_params.input1_multiplier; - int input1_shift = op_params.input1_shift; - int32_t input2_offset = op_params.input2_offset; - int32_t input2_multiplier = op_params.input2_multiplier; - int input2_shift = op_params.input2_shift; - - const int64_t flatsize = - MatchingFlatSize(input1_shape, input2_shape, output_shape); - for (int64_t i = 0; i < flatsize; ++i) { - const int32_t input1_val = input1_offset + input1_data[i]; - const int32_t input2_val = input2_offset + input2_data[i]; - const int32_t shifted_input1_val = input1_val * (1 << left_shift); - const int32_t shifted_input2_val = input2_val * (1 << left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, input1_multiplier, input1_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, input2_multiplier, input2_shift); - output_data[i] = F(scaled_input1_val, scaled_input2_val); - } -} - -struct BroadcastComparison4DSlowCommon { - const RuntimeShape output_shape; - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; -}; - -inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess( - const RuntimeShape& unextended_input1_shape, - const RuntimeShape& unextended_input2_shape, - const RuntimeShape& unextended_output_shape) { - TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, - unextended_input2_shape, &desc1, &desc2); - return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1, - desc2}; -} - -template F> -inline void BroadcastComparison4DSlowImpl( - const ComparisonParams& op_params, - const RuntimeShape& unextended_input1_shape, const T* input1_data, - const RuntimeShape& unextended_input2_shape, const T* input2_data, - const RuntimeShape& unextended_output_shape, bool* output_data) { - const BroadcastComparison4DSlowCommon dims = - BroadcastComparison4DSlowPreprocess(unextended_input1_shape, - unextended_input2_shape, - unextended_output_shape); - - for (int b = 0; b < dims.output_shape.Dims(0); ++b) { - for (int y = 0; y < dims.output_shape.Dims(1); ++y) { - for (int x = 0; x < dims.output_shape.Dims(2); ++x) { - for (int c = 0; c < dims.output_shape.Dims(3); ++c) { - output_data[Offset(dims.output_shape, b, y, x, c)] = - F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)], - input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]); - } - } - } - } -} - -template F> -inline void BroadcastComparison4DSlow(const ComparisonParams& op_params, - const RuntimeShape& input1_shape, - const float* input1_data, - const RuntimeShape& input2_shape, - const float* input2_data, - const RuntimeShape& output_shape, - bool* output_data) { - BroadcastComparison4DSlowImpl(op_params, input1_shape, input1_data, - input2_shape, input2_data, - output_shape, output_data); -} - -template F> -inline void BroadcastComparison4DSlowWithScaling( - const ComparisonParams& op_params, - const RuntimeShape& unextended_input1_shape, const T* input1_data, - const RuntimeShape& unextended_input2_shape, const T* input2_data, - const RuntimeShape& unextended_output_shape, bool* output_data) { - const BroadcastComparison4DSlowCommon dims = - BroadcastComparison4DSlowPreprocess(unextended_input1_shape, - unextended_input2_shape, - unextended_output_shape); - - int left_shift = op_params.left_shift; - int32_t input1_offset = op_params.input1_offset; - int32_t input1_multiplier = op_params.input1_multiplier; - int input1_shift = op_params.input1_shift; - int32_t input2_offset = op_params.input2_offset; - int32_t input2_multiplier = op_params.input2_multiplier; - int input2_shift = op_params.input2_shift; - - for (int b = 0; b < dims.output_shape.Dims(0); ++b) { - for (int y = 0; y < dims.output_shape.Dims(1); ++y) { - for (int x = 0; x < dims.output_shape.Dims(2); ++x) { - for (int c = 0; c < dims.output_shape.Dims(3); ++c) { - const int32_t input1_val = - input1_offset + - input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)]; - const int32_t input2_val = - input2_offset + - input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]; - const int32_t shifted_input1_val = input1_val * (1 << left_shift); - const int32_t shifted_input2_val = input2_val * (1 << left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, input1_multiplier, input1_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, input2_multiplier, input2_shift); - output_data[Offset(dims.output_shape, b, y, x, c)] = - F(scaled_input1_val, scaled_input2_val); - } - } - } - } -} - -#define TFLITE_COMPARISON_OP(name) \ - inline void name(const ComparisonParams& op_params, \ - const RuntimeShape& input1_shape, const float* input1_data, \ - const RuntimeShape& input2_shape, const float* input2_data, \ - const RuntimeShape& output_shape, bool* output_data) { \ - Comparison(op_params, input1_shape, input1_data, input2_shape, \ - input2_data, output_shape, output_data); \ - } \ - template \ - inline void name##NoScaling( \ - const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ - const T* input1_data, const RuntimeShape& input2_shape, \ - const T* input2_data, const RuntimeShape& output_shape, \ - bool* output_data) { \ - ComparisonImpl(op_params, input1_shape, input1_data, \ - input2_shape, input2_data, output_shape, \ - output_data); \ - } \ - template \ - inline void name##WithScaling( \ - const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ - const T* input1_data, const RuntimeShape& input2_shape, \ - const T* input2_data, const RuntimeShape& output_shape, \ - bool* output_data) { \ - ComparisonWithScaling(op_params, input1_shape, input1_data, \ - input2_shape, input2_data, \ - output_shape, output_data); \ - } \ - template \ - inline void Broadcast4DSlow##name##NoScaling( \ - const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ - const T* input1_data, const RuntimeShape& input2_shape, \ - const T* input2_data, const RuntimeShape& output_shape, \ - bool* output_data) { \ - BroadcastComparison4DSlowImpl( \ - op_params, input1_shape, input1_data, input2_shape, input2_data, \ - output_shape, output_data); \ - } \ - inline void Broadcast4DSlow##name( \ - const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ - const float* input1_data, const RuntimeShape& input2_shape, \ - const float* input2_data, const RuntimeShape& output_shape, \ - bool* output_data) { \ - BroadcastComparison4DSlow(op_params, input1_shape, input1_data, \ - input2_shape, input2_data, \ - output_shape, output_data); \ - } \ - template \ - inline void Broadcast4DSlow##name##WithScaling( \ - const ComparisonParams& op_params, const RuntimeShape& input1_shape, \ - const T* input1_data, const RuntimeShape& input2_shape, \ - const T* input2_data, const RuntimeShape& output_shape, \ - bool* output_data) { \ - BroadcastComparison4DSlowWithScaling( \ - op_params, input1_shape, input1_data, input2_shape, input2_data, \ - output_shape, output_data); \ - } -TFLITE_COMPARISON_OP(Equal); -TFLITE_COMPARISON_OP(NotEqual); -TFLITE_COMPARISON_OP(Greater); -TFLITE_COMPARISON_OP(GreaterEqual); -TFLITE_COMPARISON_OP(Less); -TFLITE_COMPARISON_OP(LessEqual); -#undef TFLITE_COMPARISON_OP - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/concatenation.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/concatenation.h deleted file mode 100644 index 9d2ecbec..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/concatenation.h +++ /dev/null @@ -1,141 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -template -inline void Concatenation(const ConcatenationParams& params, - const RuntimeShape* const* input_shapes, - const Scalar* const* input_data, - const RuntimeShape& output_shape, - Scalar* output_data) { - int axis = params.axis; - int inputs_count = params.inputs_count; - const int concat_dimensions = output_shape.DimensionsCount(); - TFLITE_DCHECK_LT(axis, concat_dimensions); - - int64_t concat_size = 0; - for (int i = 0; i < inputs_count; i++) { - TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions); - for (int j = 0; j < concat_dimensions; j++) { - if (j != axis) { - MatchingDim(*input_shapes[i], j, output_shape, j); - } - } - concat_size += input_shapes[i]->Dims(axis); - } - TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis)); - int64_t outer_size = 1; - for (int i = 0; i < axis; ++i) { - outer_size *= output_shape.Dims(i); - } - // For all input arrays, - // FlatSize() = outer_size * Dims(axis) * base_inner_size; - int64_t base_inner_size = 1; - for (int i = axis + 1; i < concat_dimensions; ++i) { - base_inner_size *= output_shape.Dims(i); - } - - Scalar* output_ptr = output_data; - for (int k = 0; k < outer_size; k++) { - for (int i = 0; i < inputs_count; ++i) { - const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size; - const Scalar* input_ptr = input_data[i] + k * copy_size; - memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar)); - output_ptr += copy_size; - } - } -} - -// TODO(b/174275780): The quantized implementation of concatentation isn't fully -// quantized as it takes scale as a floating point value. This should be fixed -// when optimizng this routine further. -inline void ConcatenationWithScaling(const ConcatenationParams& params, - const RuntimeShape* const* input_shapes, - const uint8_t* const* input_data, - const RuntimeShape& output_shape, - uint8_t* output_data) { - int axis = params.axis; - const int32_t* input_zeropoint = params.input_zeropoint; - const float* input_scale = params.input_scale; - int inputs_count = params.inputs_count; - const int32_t output_zeropoint = params.output_zeropoint; - const float output_scale = params.output_scale; - - const int concat_dimensions = output_shape.DimensionsCount(); - TFLITE_DCHECK_LT(axis, concat_dimensions); - - int64_t concat_size = 0; - for (int i = 0; i < inputs_count; i++) { - TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions); - for (int j = 0; j < concat_dimensions; j++) { - if (j != axis) { - MatchingDim(*input_shapes[i], j, output_shape, j); - } - } - concat_size += input_shapes[i]->Dims(axis); - } - TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis)); - int64_t outer_size = 1; - for (int i = 0; i < axis; ++i) { - outer_size *= output_shape.Dims(i); - } - // For all input arrays, - // FlatSize() = outer_size * Dims(axis) * base_inner_size; - int64_t base_inner_size = 1; - for (int i = axis + 1; i < concat_dimensions; ++i) { - base_inner_size *= output_shape.Dims(i); - } - - const float inverse_output_scale = 1.f / output_scale; - uint8_t* output_ptr = output_data; - for (int k = 0; k < outer_size; k++) { - for (int i = 0; i < inputs_count; ++i) { - const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size; - const uint8_t* input_ptr = input_data[i] + k * copy_size; - if (input_zeropoint[i] == output_zeropoint && - input_scale[i] == output_scale) { - memcpy(output_ptr, input_ptr, copy_size); - } else { - const float scale = input_scale[i] * inverse_output_scale; - const float bias = -input_zeropoint[i] * scale; - for (int j = 0; j < copy_size; ++j) { - const int32_t value = static_cast(tflite::TfLiteRound( - input_ptr[j] * scale + bias)) + - output_zeropoint; - output_ptr[j] = static_cast( - std::max(std::min(255, value), 0)); - } - } - output_ptr += copy_size; - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/conv.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/conv.h deleted file mode 100644 index 3a53e06e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/conv.h +++ /dev/null @@ -1,287 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, - const float* input_data, const RuntimeShape& filter_shape, - const float* filter_data, const RuntimeShape& bias_shape, - const float* bias_data, const RuntimeShape& output_shape, - float* output_data, const RuntimeShape& im2col_shape, - float* im2col_data) { - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const float output_activation_min = params.float_activation_min; - const float output_activation_max = params.float_activation_max; - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - - (void)im2col_data; // only used in optimized code. - (void)im2col_shape; // only used in optimized code. - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int input_depth = input_shape.Dims(3); - const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - if (bias_data) { - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - } - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int filter_input_depth = filter_shape.Dims(3); - const int groups = input_depth / filter_input_depth; - TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); - const int filters_per_group = output_depth / groups; - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - const int in_y_origin = (out_y * stride_height) - pad_height; - for (int out_x = 0; out_x < output_width; ++out_x) { - const int in_x_origin = (out_x * stride_width) - pad_width; - for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - auto group = out_channel / filters_per_group; - float total = 0.f; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - const int in_y = in_y_origin + dilation_height_factor * filter_y; - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - - // Zero padding by omitting the areas outside the image. - const bool is_point_inside_image = - (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height); - - if (!is_point_inside_image) { - continue; - } - for (int in_channel = 0; in_channel < filter_input_depth; - ++in_channel) { - float input_value = - input_data[Offset(input_shape, batch, in_y, in_x, - in_channel + group * filter_input_depth)]; - float filter_value = filter_data[Offset( - filter_shape, out_channel, filter_y, filter_x, in_channel)]; - total += (input_value * filter_value); - } - } - } - float bias_value = 0.0f; - if (bias_data) { - bias_value = bias_data[out_channel]; - } - output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - ActivationFunctionWithMinMax(total + bias_value, - output_activation_min, - output_activation_max); - } - } - } - } -} - -inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, - const uint8_t* input_data, const RuntimeShape& filter_shape, - const uint8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - uint8_t* output_data, const RuntimeShape& im2col_shape, - uint8_t* im2col_data, void* cpu_backend_context) { - (void)cpu_backend_context; // only used in optimized code. - (void)im2col_data; // only used in optimized code. - (void)im2col_shape; // only used in optimized code. - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const int32_t input_offset = params.input_offset; - const int32_t filter_offset = params.weights_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int input_depth = input_shape.Dims(3); - const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - if (bias_data) { - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - } - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int filter_input_depth = filter_shape.Dims(3); - const int groups = input_depth / filter_input_depth; - TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); - const int filters_per_group = output_depth / groups; - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - const int in_y_origin = (out_y * stride_height) - pad_height; - for (int out_x = 0; out_x < output_width; ++out_x) { - const int in_x_origin = (out_x * stride_width) - pad_width; - for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - auto group = out_channel / filters_per_group; - int32_t acc = 0; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - const int in_y = in_y_origin + dilation_height_factor * filter_y; - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - - // Zero padding by omitting the areas outside the image. - const bool is_point_inside_image = - (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height); - - if (!is_point_inside_image) { - continue; - } - - for (int in_channel = 0; in_channel < filter_input_depth; - ++in_channel) { - int32_t input_val = - input_data[Offset(input_shape, batch, in_y, in_x, - in_channel + group * filter_input_depth)]; - int32_t filter_val = filter_data[Offset( - filter_shape, out_channel, filter_y, filter_x, in_channel)]; - acc += - (filter_val + filter_offset) * (input_val + input_offset); - } - } - } - if (bias_data) { - acc += bias_data[out_channel]; - } - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, - output_shift); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - static_cast(acc); - } - } - } - } -} - -inline void HybridConvPerChannel( - const ConvParams& params, float* scaling_factors_ptr, - const RuntimeShape& input_shape, const int8_t* input_data, - const RuntimeShape& filter_shape, const int8_t* filter_data, - const RuntimeShape& bias_shape, const float* bias_data, - const RuntimeShape& output_shape, float* output_data, - const RuntimeShape& im2col_shape, int8_t* im2col_data, - const float* per_channel_scale, int32_t* input_offset) { - (void)im2col_data; // only used in optimized code. - (void)im2col_shape; // only used in optimized code. - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const float output_activation_min = params.float_activation_min; - const float output_activation_max = params.float_activation_max; - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int input_depth = input_shape.Dims(3); - const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - if (bias_data) { - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - } - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int filter_input_depth = filter_shape.Dims(3); - const int groups = input_depth / filter_input_depth; - TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); - const int filters_per_group = output_depth / groups; - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - auto group = out_channel / filters_per_group; - const int in_x_origin = (out_x * stride_width) - pad_width; - const int in_y_origin = (out_y * stride_height) - pad_height; - int32_t acc = 0; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - for (int in_channel = 0; in_channel < filter_input_depth; - ++in_channel) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = - in_y_origin + dilation_height_factor * filter_y; - // If the location is outside the bounds of the input image, - // use zero as a default value. - if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height)) { - int32_t input_val = input_data[Offset( - input_shape, batch, in_y, in_x, - in_channel + group * filter_input_depth)]; - int32_t filter_val = - filter_data[Offset(filter_shape, out_channel, filter_y, - filter_x, in_channel)]; - acc += filter_val * (input_val - input_offset[batch]); - } - } - } - } - float acc_float = - acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch]; - if (bias_data) { - acc_float += bias_data[out_channel]; - } - output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - ActivationFunctionWithMinMax(acc_float, output_activation_min, - output_activation_max); - } - } - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/cumsum.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/cumsum.h deleted file mode 100644 index 7cbc87c0..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/cumsum.h +++ /dev/null @@ -1,175 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_ - -#include -#include -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" - -namespace tflite { -namespace reference_ops { - -template -inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis, - bool exclusive, bool reverse, T* output_data) { - const int32_t rank = shape.DimensionsCount(); - TFLITE_DCHECK_GE(rank, 1); - TFLITE_DCHECK_GE(axis, 0); - TFLITE_DCHECK_LT(axis, rank); - - size_t inner = 1; - size_t outer = 1; - size_t depth = 1; - for (int32_t i = 0; i < rank; i++) { - if (i < axis) - inner *= shape.Dims(i); - else if (i > axis) - outer *= shape.Dims(i); - else - depth = shape.Dims(i); - } - - for (size_t outer_index = 0; outer_index < outer; outer_index++) { - size_t outer_index_adj; - if (reverse) - outer_index_adj = (outer - 1) - outer_index; - else - outer_index_adj = outer_index; - for (size_t inner_index = 0; inner_index < inner; inner_index++) { - T accumulator = 0; - size_t inner_index_adj; - if (reverse) - inner_index_adj = (inner - 1) - inner_index; - else - inner_index_adj = inner_index; - for (size_t depth_index = 0; depth_index < depth; depth_index++) { - size_t depth_index_adj; - if (reverse) - depth_index_adj = (depth - 1) - depth_index; - else - depth_index_adj = depth_index; - - size_t index = outer_index_adj; - index += inner_index_adj * depth * outer; - index += depth_index_adj * outer; - - if (exclusive) { - output_data[index] = accumulator; - accumulator += input_data[index]; - } else { - accumulator += input_data[index]; - output_data[index] = accumulator; - } - } - } - } -} - -// -// Quantized INT8 CUMSUM -// -inline void CumSum(const ArithmeticParams& params, const int8_t* input_data, - const RuntimeShape& shape, int32_t axis, bool exclusive, - bool reverse, int8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - // Input offset is negative input zero point. Activation tensors are - // asymmetric quantized so they span the full int8 range. - // All inputs should have same zero-point and scale, this is checked during - // Prepare stage. - TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits::min()); - TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits::max()); - - const int32_t rank = shape.DimensionsCount(); - TFLITE_DCHECK_GE(rank, 1); - TFLITE_DCHECK_GE(axis, 0); - TFLITE_DCHECK_LT(axis, rank); - - size_t inner = 1; - size_t outer = 1; - size_t depth = 1; - for (int32_t i = 0; i < rank; i++) { - if (i < axis) - inner *= shape.Dims(i); - else if (i > axis) - outer *= shape.Dims(i); - else - depth = shape.Dims(i); - } - - for (size_t outer_index = 0; outer_index < outer; outer_index++) { - size_t outer_index_adj; - if (reverse) - outer_index_adj = (outer - 1) - outer_index; - else - outer_index_adj = outer_index; - for (size_t inner_index = 0; inner_index < inner; inner_index++) { - int32_t accumulator = params.input1_offset; // accumulator = 0 - accumulator *= (1 << params.left_shift); - accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp( - accumulator, params.input1_multiplier, params.input1_shift); - - size_t inner_index_adj; - if (reverse) - inner_index_adj = (inner - 1) - inner_index; - else - inner_index_adj = inner_index; - - for (size_t depth_index = 0; depth_index < depth; depth_index++) { - size_t depth_index_adj; - if (reverse) - depth_index_adj = (depth - 1) - depth_index; - else - depth_index_adj = depth_index; - - size_t index = outer_index_adj; - index += inner_index_adj * depth * outer; - index += depth_index_adj * outer; - - const int32_t y = params.input1_offset + input_data[index]; - const int32_t shifted_y = y * (1 << params.left_shift); - const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_y, params.input1_multiplier, params.input1_shift); - - int32_t scaled_output; - if (exclusive) { - scaled_output = accumulator; - accumulator += scaled_y; - } else { - accumulator += scaled_y; - scaled_output = accumulator; - } - - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - scaled_output, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[index] = static_cast(clamped_output); - } - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depth_to_space.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depth_to_space.h deleted file mode 100644 index 23cff285..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depth_to_space.h +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_ - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -template -inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params, - const RuntimeShape& unextended_input_shape, - const T* input_data, - const RuntimeShape& unextended_output_shape, - T* output_data) { - TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - const int input_depth = input_shape.Dims(3); - const int input_width = input_shape.Dims(2); - const int input_height = input_shape.Dims(1); - const int input_batch = input_shape.Dims(0); - - const int output_depth = output_shape.Dims(3); - const int output_width = output_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_batch = output_shape.Dims(0); - - const int32_t block_size = op_params.block_size; - - TFLITE_DCHECK_EQ(input_width * block_size, output_width); - TFLITE_DCHECK_EQ(input_height * block_size, output_height); - TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size); - TFLITE_DCHECK_EQ(input_batch, output_batch); - - for (int out_b = 0; out_b < output_batch; ++out_b) { - for (int out_h = 0; out_h < output_height; ++out_h) { - for (int out_w = 0; out_w < output_width; ++out_w) { - for (int out_d = 0; out_d < output_depth; ++out_d) { - const int in_d = - out_d + ((out_h % block_size) * block_size + out_w % block_size) * - output_depth; - - const int in_w = out_w / block_size; - const int in_h = out_h / block_size; - const int in_b = out_b; - - const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d); - const int output_index = - Offset(output_shape, out_b, out_h, out_w, out_d); - - output_data[output_index] = input_data[input_index]; - } - } - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h deleted file mode 100644 index 0cecb16b..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_ - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -inline void DepthwiseConv( - const DepthwiseParams& params, const RuntimeShape& input_shape, - const float* input_data, const RuntimeShape& filter_shape, - const float* filter_data, const RuntimeShape& bias_shape, - const float* bias_data, const RuntimeShape& output_shape, - float* output_data) { - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const int depth_multiplier = params.depth_multiplier; - const float output_activation_min = params.float_activation_min; - const float output_activation_max = params.float_activation_max; - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int input_depth = input_shape.Dims(3); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - - for (int b = 0; b < batches; ++b) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int ic = 0; ic < input_depth; ++ic) { - for (int m = 0; m < depth_multiplier; m++) { - const int oc = m + ic * depth_multiplier; - const int in_x_origin = (out_x * stride_width) - pad_width; - const int in_y_origin = (out_y * stride_height) - pad_height; - float total = 0.f; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = - in_y_origin + dilation_height_factor * filter_y; - // If the location is outside the bounds of the input image, - // use zero as a default value. - if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height)) { - float input_value = - input_data[Offset(input_shape, b, in_y, in_x, ic)]; - float filter_value = filter_data[Offset( - filter_shape, 0, filter_y, filter_x, oc)]; - total += (input_value * filter_value); - } - } - } - float bias_value = 0.0f; - if (bias_data) { - bias_value = bias_data[oc]; - } - output_data[Offset(output_shape, b, out_y, out_x, oc)] = - ActivationFunctionWithMinMax(total + bias_value, - output_activation_min, - output_activation_max); - } - } - } - } - } -} - -} // end namespace reference_ops -} // end namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h deleted file mode 100644 index d4fba139..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h +++ /dev/null @@ -1,319 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ - -#include - -#include "fixedpoint/fixedpoint.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -// Used in tests and template parameters to control which version of depthwise -// convolution is called. Primarily for reference code, and specializations -// forced in tests. -enum class DepthwiseConvImplementation { - // Run all tests against kUseStandardEntry even if also testing another - // kernel, since we need to be sure that the main DepthwiseConv() function in - // optimized_ops.h dispatches to a correctly-executing kernel. - kNone = 0, // The "default" option: use the normal - // DepthwiseConv kernel (entry) function. - kUseGenericKernel, // Forced use of generic kernel. - kUseNeon3x3, // 3x3 kernel that uses NEON when available. - kUseNeon3x3DotProduct, // 3x3 kernel that uses dot-product enabled NEON - // when available. - kUseCModel3x3DotProduct, // 3x3 kernel, reference C model that is intended - // to match overall design NEON code. - kUseUnwound3x3DotProduct, // 3x3 kernel, reference C model with unwound loops - // and some arrays. - kUseIntrinsics3x3DotProduct, // 3x3 kernel using NEON intrinsics. -}; - -// Category of depthwise convolution output rounding. -enum class DepthwiseConvOutputRounding { - kNone = 0, // Invalid: specific method must be specified. - kAwayFromZero, // Original method: exact halves rounded away from zero. - kUpward, // Halves towards +infinity: adds 0.5 before truncate. - // This is where a future kNearestEven would be placed. -}; - -// Category of depthwise convolution depth multiplication. -enum class DepthwiseConvDepthMultiplication { - kNoMultiplication = 0, // Depth multiplier = 1. - kUnitInputDepth, // Input depth = 1, output depth = depth multiplier. -}; - -namespace reference_ops { -namespace depthwise_conv { - -template -inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier, - int shift) { - TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone); - return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); -} - -// Single-rounding MultiplyByQuantizedMultiplier -#if TFLITE_SINGLE_ROUNDING -template <> -inline int32_t DepthwiseConvRound( - int32_t x, int32_t quantized_multiplier, int shift) { - using gemmlowp::RoundingDivideByPOT; - using gemmlowp::SaturatingRoundingDoublingHighMul; - int left_shift = shift > 0 ? shift : 0; - int right_shift = shift > 0 ? 0 : -shift; - return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul( - x * (1 << left_shift), quantized_multiplier), - right_shift); -} - -template <> -inline int32_t DepthwiseConvRound( - int32_t x, int32_t quantized_multiplier, int shift) { - return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); -} -// Double-rounding MultiplyByQuantizedMultiplier -#else -template <> -inline int32_t DepthwiseConvRound( - int32_t x, int32_t quantized_multiplier, int shift) { - return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); -} - -template <> -inline int32_t DepthwiseConvRound( - int32_t x, int32_t quantized_multiplier, int shift) { - using gemmlowp::SaturatingRoundingDoublingHighMul; - const int left_shift = shift > 0 ? shift : 0; - const int right_shift = shift > 0 ? 0 : -shift; - const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0; - return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift), - quantized_multiplier) + - rounding_offset) >> - right_shift; -} -#endif // TFLITE_SINGLE_ROUNDING - -template -struct DepthwiseConvBasicKernel { - static inline void Run( - const DepthwiseParams& params, const RuntimeShape& input_shape, - const uint8_t* input_data, const RuntimeShape& filter_shape, - const uint8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - uint8_t* output_data) { - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const int depth_multiplier = params.depth_multiplier; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - const int32_t input_offset = params.input_offset; - const int32_t filter_offset = params.weights_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int input_depth = input_shape.Dims(3); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - - for (int b = 0; b < batches; ++b) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int ic = 0; ic < input_depth; ++ic) { - for (int m = 0; m < depth_multiplier; m++) { - const int oc = m + ic * depth_multiplier; - const int in_x_origin = (out_x * stride_width) - pad_width; - const int in_y_origin = (out_y * stride_height) - pad_height; - int32_t acc = 0; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int in_x = - in_x_origin + dilation_width_factor * filter_x; - const int in_y = - in_y_origin + dilation_height_factor * filter_y; - // If the location is outside the bounds of the input image, - // use zero as a default value. - if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height)) { - int32_t input_val = - input_data[Offset(input_shape, b, in_y, in_x, ic)]; - int32_t filter_val = filter_data[Offset( - filter_shape, 0, filter_y, filter_x, oc)]; - acc += (filter_val + filter_offset) * - (input_val + input_offset); - } - } - } - if (bias_data) { - acc += bias_data[oc]; - } - acc = DepthwiseConvRound(acc, output_multiplier, - output_shift); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[Offset(output_shape, b, out_y, out_x, oc)] = - static_cast(acc); - } - } - } - } - } - } - - // TODO(b/148596273): Reconcile reference versions, perhaps with common - // MultiplyByQuantizedMultiplier or DepthwiseConvRound function. - static inline void RunPerChannel( - const DepthwiseParams& params, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { - // Get parameters. - // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro. - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const int depth_multiplier = params.depth_multiplier; - const int32_t input_offset = params.input_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - const int32_t* output_multiplier = params.output_multiplier_per_channel; - const int32_t* output_shift = params.output_shift_per_channel; - - // Check dimensions of the tensors. - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int input_depth = input_shape.Dims(3); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - for (int m = 0; m < depth_multiplier; ++m) { - const int output_channel = m + in_channel * depth_multiplier; - const int in_x_origin = (out_x * stride_width) - pad_width; - const int in_y_origin = (out_y * stride_height) - pad_height; - int32_t acc = 0; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int in_x = - in_x_origin + dilation_width_factor * filter_x; - const int in_y = - in_y_origin + dilation_height_factor * filter_y; - // Zero padding by omitting the areas outside the image. - const bool is_point_inside_image = - (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height); - if (is_point_inside_image) { - int32_t input_val = input_data[Offset( - input_shape, batch, in_y, in_x, in_channel)]; - int32_t filter_val = filter_data[Offset( - filter_shape, 0, filter_y, filter_x, output_channel)]; - // Accumulate with 32 bits accumulator. - // In the nudging process during model quantization, we - // force real value of 0.0 be represented by a quantized - // value. This guarantees that the input_offset is a int8_t, - // even though it is represented using int32_t. int32_t += - // int8_t - // * (int8_t - int8_t) so the highest value we can get from - // each accumulation is [-127, 127] * ([-128, 127] - - // [-128, 127]), which is [-32512, 32512]. log2(32512) - // = 14.98, which means we can accumulate at least 2^16 - // multiplications without overflow. The accumulator is - // applied to a filter so the accumulation logic will hold - // as long as the filter size (filter_y * filter_x * - // in_channel) does not exceed 2^16, which is the case in - // all the models we have seen so far. - acc += filter_val * (input_val + input_offset); - } - } - } - if (bias_data) { - acc += bias_data[output_channel]; - } - acc = DepthwiseConvRound( - acc, output_multiplier[output_channel], - output_shift[output_channel]); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, - output_channel)] = static_cast(acc); - } - } - } - } - } - } -}; - -} // namespace depthwise_conv - -inline void DepthwiseConv( - const DepthwiseParams& params, const RuntimeShape& input_shape, - const uint8_t* input_data, const RuntimeShape& filter_shape, - const uint8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - uint8_t* output_data) { - return depthwise_conv::DepthwiseConvBasicKernel< - DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape, - input_data, filter_shape, - filter_data, bias_shape, - bias_data, output_shape, - output_data); -} - -} // namespace reference_ops -} // end namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/dequantize.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/dequantize.h deleted file mode 100644 index b90951f9..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/dequantize.h +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_ - -#include - -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -// Dequantizes into a float without rounding. -template -inline void Dequantize(const tflite::DequantizationParams& op_params, - const RuntimeShape& input_shape, - const InputT* input_data, - const RuntimeShape& output_shape, OutputT* output_data) { - int32_t zero_point = op_params.zero_point; - const double scale = op_params.scale; - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - for (int i = 0; i < flat_size; i++) { - const int32_t val = input_data[i]; - const OutputT result = static_cast(scale * (val - zero_point)); - output_data[i] = result; - } -} - -// Dequantizes per-channel quantized tensor to float. -template -inline void PerChannelDequantize( - const tflite::PerChannelDequantizationParams& op_params, - const RuntimeShape& input_shape, const T* input_data, - const RuntimeShape& output_shape, float* output_data) { - // Ensure flat size is same. - MatchingFlatSize(input_shape, output_shape); - - const int32_t* zero_point = op_params.zero_point; - const float* scale = op_params.scale; - const int32_t quantized_dimension = op_params.quantized_dimension; - const int32_t num_dims = input_shape.DimensionsCount(); - const int32_t* dims_data = input_shape.DimsData(); - std::vector current_dim(num_dims, 0); - - do { - size_t offset = - ReducedOutputOffset(num_dims, reinterpret_cast(dims_data), - current_dim.data(), 0, nullptr); - const int channel = current_dim[quantized_dimension]; - const int32_t val = input_data[offset]; - const float result = - static_cast(scale[channel] * (val - zero_point[channel])); - output_data[offset] = result; - } while (NextIndex(num_dims, reinterpret_cast(dims_data), - current_dim.data())); -} - -} // namespace reference_ops - -} // namespace tflite -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/div.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/div.h deleted file mode 100644 index df8da1b1..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/div.h +++ /dev/null @@ -1,247 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { - -namespace reference_ops { - -template -inline void DivCheckArithmeticParams(const ArithmeticParams& params) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - // Input offset is negative input zero point. Activation tensors are - // asymmetric quantized so they span the full int8 range. - constexpr int32_t max_value = - static_cast(std::numeric_limits::max()); - TFLITE_DCHECK_GE(params.input1_offset, -max_value); - TFLITE_DCHECK_LE(params.input1_offset, max_value); - TFLITE_DCHECK_GE(params.input2_offset, -max_value); - TFLITE_DCHECK_LE(params.input2_offset, max_value); - TFLITE_DCHECK_GE(params.output_offset, -max_value); - TFLITE_DCHECK_LE(params.output_offset, max_value); -} - -// Element-wise div that can often be used for inner loop of broadcast Div as -// well as the non-broadcast Div. -template -inline void DivElementwise(int size, const ArithmeticParams& params, - const T* input1_data, const T* input2_data, - T* output_data) { - DivCheckArithmeticParams(params); - - for (int i = 0; i < size; ++i) { - int32_t input1_val = params.input1_offset + input1_data[i]; - int32_t input2_val = params.input2_offset + input2_data[i]; - TFLITE_DCHECK_NE(input2_val, 0); - if (input2_val < 0) { - // Invert signs to avoid a negative input2_val as input2_inv needs to be - // positive to be used as multiplier of MultiplyByQuantizedMultiplier. - input1_val = -input1_val; - input2_val = -input2_val; - } - int recip_shift; - const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift); - const int headroom = CountLeadingSignBits(input1_val); - const int32_t unscaled_quotient = - MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv, - headroom); - const int total_shift = params.output_shift - recip_shift - headroom; - const int32_t unclamped_result = - params.output_offset + - MultiplyByQuantizedMultiplierSmallerThanOneExp( - unscaled_quotient, params.output_multiplier, total_shift); - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, unclamped_result)); - output_data[i] = static_cast(clamped_output); - } -} - -inline void Div(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const uint8_t* input1_data, - const RuntimeShape& input2_shape, const uint8_t* input2_data, - const RuntimeShape& output_shape, uint8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - DivElementwise(flat_size, params, input1_data, input2_data, output_data); -} - -inline void Div(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const int8_t* input1_data, - const RuntimeShape& input2_shape, const int8_t* input2_data, - const RuntimeShape& output_shape, int8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - DivElementwise(flat_size, params, input1_data, input2_data, output_data); -} - -template -inline void BroadcastDivSlowQuantized( - const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape, - const T* input1_data, const RuntimeShape& unextended_input2_shape, - const T* input2_data, const RuntimeShape& unextended_output_shape, - T* output_data) { - TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N); - - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, - unextended_input2_shape, &desc1, &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), - &output_desc); - - DivCheckArithmeticParams(params); - - auto div_func = [&](int indexes[N]) { - int32_t input1_val = - params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)]; - int32_t input2_val = - params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)]; - TFLITE_DCHECK_NE(input2_val, 0); - if (input2_val < 0) { - // Invert signs to avoid a negative input2_val as input2_inv needs to be - // positive to be used as multiplier of MultiplyByQuantizedMultiplier. - input1_val = -input1_val; - input2_val = -input2_val; - } - int recip_shift; - const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift); - const int headroom = CountLeadingSignBits(input1_val); - const int32_t unscaled_quotient = - MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv, - headroom); - const int total_shift = params.output_shift - recip_shift - headroom; - const int32_t unclamped_result = - params.output_offset + - MultiplyByQuantizedMultiplierSmallerThanOneExp( - unscaled_quotient, params.output_multiplier, total_shift); - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, unclamped_result)); - output_data[SubscriptToIndex(output_desc, indexes)] = - static_cast(clamped_output); - }; - NDOpsHelper(output_desc, div_func); -} - -template -inline void BroadcastDivSlow(const ArithmeticParams& params, - const RuntimeShape& unextended_input1_shape, - const uint8_t* input1_data, - const RuntimeShape& unextended_input2_shape, - const uint8_t* input2_data, - const RuntimeShape& unextended_output_shape, - uint8_t* output_data) { - BroadcastDivSlowQuantized( - params, unextended_input1_shape, input1_data, unextended_input2_shape, - input2_data, unextended_output_shape, output_data); -} - -template -inline void BroadcastDivSlow(const ArithmeticParams& params, - const RuntimeShape& unextended_input1_shape, - const int8_t* input1_data, - const RuntimeShape& unextended_input2_shape, - const int8_t* input2_data, - const RuntimeShape& unextended_output_shape, - int8_t* output_data) { - BroadcastDivSlowQuantized( - params, unextended_input1_shape, input1_data, unextended_input2_shape, - input2_data, unextended_output_shape, output_data); -} - -// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary -// dimensionality if the runtime code does a single loop over one dimension -// that handles broadcasting as the base case. The code generator would then -// generate max(D1, D2) nested for loops. -template -void BroadcastDivSlow(const ArithmeticParams& params, - const RuntimeShape& unextended_input1_shape, - const T* input1_data, - const RuntimeShape& unextended_input2_shape, - const T* input2_data, - const RuntimeShape& unextended_output_shape, - T* output_data) { - T output_activation_min; - T output_activation_max; - GetActivationParams(params, &output_activation_min, &output_activation_max); - - TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N); - - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, - unextended_input2_shape, &desc1, &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), - &output_desc); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest - // stride, typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - - auto div_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, indexes)] / - input2_data[SubscriptToIndex(desc2, indexes)], - output_activation_min, output_activation_max); - }; - NDOpsHelper(output_desc, div_func); -} - -template -inline void Div(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const T* input1_data, - const RuntimeShape& input2_shape, const T* input2_data, - const RuntimeShape& output_shape, T* output_data) { - T output_activation_min; - T output_activation_max; - GetActivationParams(params, &output_activation_min, &output_activation_max); - - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = ActivationFunctionWithMinMax( - input1_data[i] / input2_data[i], output_activation_min, - output_activation_max); - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/elu.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/elu.h deleted file mode 100644 index 3dc93589..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/elu.h +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_ - -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -inline void Elu(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - const float val = input_data[i]; - output_data[i] = val < 0.0f ? TfLiteExpm1(val) : val; - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ELU_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/exp.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/exp.h deleted file mode 100644 index 134ee13f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/exp.h +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_ - -#include - -#include "ruy/profiler/instrumentation.h" // from @ruy -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -template -inline void Exp(const T* input_data, const size_t num_elements, - T* output_data) { - ruy::profiler::ScopeLabel label("Exp"); - for (size_t idx = 0; idx < num_elements; ++idx) { - output_data[idx] = std::exp(input_data[idx]); - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_EXP_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fill.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fill.h deleted file mode 100644 index 16630e61..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fill.h +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_ - -#include - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -template -void Fill(const RuntimeShape& value_shape, const T* value_data, - const RuntimeShape& output_shape, T* output_data) { - TFLITE_DCHECK_EQ(value_shape.DimensionsCount(), 0); - const int flat_size = output_shape.FlatSize(); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = *value_data; - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FILL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor.h deleted file mode 100644 index 0693fd42..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_ - -#include - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -inline void Floor(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - for (int i = 0; i < flat_size; i++) { - int offset = i; - output_data[offset] = std::floor(input_data[offset]); - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_div.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_div.h deleted file mode 100644 index e75d473c..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_div.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_ - -#include -#include - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -template -T FloorDiv(T input1, T input2) { - return std::floor(std::divides()(static_cast(input1), - static_cast(input2))); -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_mod.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_mod.h deleted file mode 100644 index 20ce18b7..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/floor_mod.h +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_ - -#include -#include - -namespace tflite { - -namespace reference_ops { - -template -T FloorMod(T input1, T input2) { - struct FloatMod { - float operator()(const float lhs, const float rhs) const { - return std::fmod(lhs, rhs); - } - }; - using ModFunc = typename std::conditional::value, - std::modulus, FloatMod>::type; - ModFunc mod_func; - T trunc_mod = mod_func(input1, input2); - return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0)) - ? (trunc_mod + input2) - : trunc_mod; -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fully_connected.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fully_connected.h deleted file mode 100644 index ba51cbcf..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/fully_connected.h +++ /dev/null @@ -1,323 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_ - -#include - -#include "ruy/profiler/instrumentation.h" // from @ruy -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const float* input_data, const RuntimeShape& weights_shape, - const float* weights_data, const RuntimeShape& bias_shape, - const float* bias_data, const RuntimeShape& output_shape, - float* output_data) { - const float output_activation_min = params.float_activation_min; - const float output_activation_max = params.float_activation_max; - // TODO(b/62193649): This really should be: - // const int batches = ArraySize(output_dims, 1); - // but the current --variable_batch hack consists in overwriting the 3rd - // dimension with the runtime batch size, as we don't keep track for each - // array of which dimension is the batch dimension in it. - const int output_dims_count = output_shape.DimensionsCount(); - const int weights_dims_count = weights_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1); - const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2, - output_shape, output_dims_count - 1); - const int accum_depth = weights_shape.Dims(weights_dims_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - float total = 0.f; - for (int d = 0; d < accum_depth; ++d) { - total += input_data[b * accum_depth + d] * - weights_data[out_c * accum_depth + d]; - } - float bias_value = 0.0f; - if (bias_data) { - bias_value = bias_data[out_c]; - } - output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax( - total + bias_value, output_activation_min, output_activation_max); - } - } -} - -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const uint8_t* input_data, const RuntimeShape& filter_shape, - const uint8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - uint8_t* output_data) { - const int32_t input_offset = params.input_offset; - const int32_t filter_offset = params.weights_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - // TODO(b/62193649): This really should be: - // const int batches = ArraySize(output_dims, 1); - // but the current --variable_batch hack consists in overwriting the 3rd - // dimension with the runtime batch size, as we don't keep track for each - // array of which dimension is the batch dimension in it. - const int output_dim_count = output_shape.DimensionsCount(); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2, - output_shape, output_dim_count - 1); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - int32_t acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += (filter_val + filter_offset) * (input_val + input_offset); - } - if (bias_data) { - acc += bias_data[out_c]; - } - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc); - } - } -} - -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const uint8_t* input_data, const RuntimeShape& filter_shape, - const uint8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - const int32_t input_offset = params.input_offset; - const int32_t filter_offset = params.weights_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - TFLITE_DCHECK_EQ(output_offset, 0); - // TODO(b/62193649): This really should be: - // const int batches = ArraySize(output_dims, 1); - // but the current --variable_batch hack consists in overwriting the 3rd - // dimension with the runtime batch size, as we don't keep track for each - // array of which dimension is the batch dimension in it. - const int output_dim_count = output_shape.DimensionsCount(); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2, - output_shape, output_dim_count - 1); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - // Internal accumulation. - // Initialize accumulator with the bias-value. - int32_t accum = bias_data[out_c]; - // Accumulation loop. - for (int d = 0; d < accum_depth; ++d) { - int16_t input_val = input_data[b * accum_depth + d] + input_offset; - int16_t filter_val = - filter_data[out_c * accum_depth + d] + filter_offset; - accum += filter_val * input_val; - } - // Down-scale the final int32_t accumulator to the scale used by our - // (16-bit, typically 3 integer bits) fixed-point format. The quantized - // multiplier and shift here have been pre-computed offline - // (e.g. by toco). - accum = - MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift); - // Saturate, cast to int16_t, and store to output array. - accum = std::max(accum, output_activation_min - output_offset); - accum = std::min(accum, output_activation_max - output_offset); - accum += output_offset; - output_data[out_c + output_depth * b] = accum; - } - } -} - -inline void ShuffledFullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const uint8_t* input_data, const RuntimeShape& weights_shape, - const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int16_t* output_data, uint8_t* shuffled_input_workspace_data) { - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - - TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1); - TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - // TODO(b/62193649): This really should be: - // const int batches = ArraySize(output_dims, 1); - // but the current --variable_batch hack consists in overwriting the 3rd - // dimension with the runtime batch size, as we don't keep track for each - // array of which dimension is the batch dimension in it. - const int output_dim_count = output_shape.DimensionsCount(); - const int weights_dim_count = weights_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2, - output_shape, output_dim_count - 1); - const int accum_depth = weights_shape.Dims(weights_dim_count - 1); - TFLITE_DCHECK((accum_depth % 16) == 0); - TFLITE_DCHECK((output_depth % 4) == 0); - - // Shuffling and xoring of input activations into the workspace buffer - uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data; - if (batches == 1) { - for (int i = 0; i < accum_depth; i++) { - shuffled_input_workspace_data[i] = input_data[i] ^ 0x80; - } - } else if (batches == 4) { - for (int c = 0; c < accum_depth; c += 16) { - for (int b = 0; b < 4; b++) { - const uint8_t* src_data_ptr = input_data + b * accum_depth + c; - for (int j = 0; j < 16; j++) { - uint8_t src_val = *src_data_ptr++; - // Flip the sign bit, so that the kernel will only need to - // reinterpret these uint8_t values as int8_t, getting for free the - // subtraction of the zero_point value 128. - uint8_t dst_val = src_val ^ 0x80; - *shuffled_input_workspace_ptr++ = dst_val; - } - } - } - } else { - TFLITE_DCHECK(false); - return; - } - - // Actual computation - if (batches == 1) { - int16_t* output_ptr = output_data; - // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) - // so that just reinterpreting them as int8_t values is equivalent to - // subtracting 128 from them, thus implementing for free the subtraction of - // the zero_point value 128. - const int8_t* shuffled_weights_ptr = - reinterpret_cast(shuffled_weights_data); - // Likewise, we preshuffled and pre-xored the input data above. - const int8_t* shuffled_input_data = - reinterpret_cast(shuffled_input_workspace_data); - for (int c = 0; c < output_depth; c += 4) { - // Internal accumulation. - // Initialize accumulator with the bias-value. - int32_t accum[4] = {0}; - // Accumulation loop. - for (int d = 0; d < accum_depth; d += 16) { - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 16; j++) { - int8_t input_val = shuffled_input_data[d + j]; - int8_t weights_val = *shuffled_weights_ptr++; - accum[i] += weights_val * input_val; - } - } - } - for (int i = 0; i < 4; i++) { - // Add bias value - int32_t acc = accum[i] + bias_data[c + i]; - // Down-scale the final int32_t accumulator to the scale used by our - // (16-bit, typically 3 integer bits) fixed-point format. The quantized - // multiplier and shift here have been pre-computed offline - // (e.g. by toco). - acc = - MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - // Saturate, cast to int16_t, and store to output array. - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_ptr[c + i] = acc; - } - } - } else if (batches == 4) { - int16_t* output_ptr = output_data; - // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) - // so that just reinterpreting them as int8_t values is equivalent to - // subtracting 128 from them, thus implementing for free the subtraction of - // the zero_point value 128. - const int8_t* shuffled_weights_ptr = - reinterpret_cast(shuffled_weights_data); - // Likewise, we preshuffled and pre-xored the input data above. - const int8_t* shuffled_input_data = - reinterpret_cast(shuffled_input_workspace_data); - for (int c = 0; c < output_depth; c += 4) { - const int8_t* shuffled_input_ptr = shuffled_input_data; - // Accumulation loop. - // Internal accumulation. - // Initialize accumulator with the bias-value. - int32_t accum[4][4]; - for (int i = 0; i < 4; i++) { - for (int b = 0; b < 4; b++) { - accum[i][b] = 0; - } - } - for (int d = 0; d < accum_depth; d += 16) { - for (int i = 0; i < 4; i++) { - for (int b = 0; b < 4; b++) { - for (int j = 0; j < 16; j++) { - int8_t input_val = shuffled_input_ptr[16 * b + j]; - int8_t weights_val = shuffled_weights_ptr[16 * i + j]; - accum[i][b] += weights_val * input_val; - } - } - } - shuffled_input_ptr += 64; - shuffled_weights_ptr += 64; - } - for (int i = 0; i < 4; i++) { - for (int b = 0; b < 4; b++) { - // Add bias value - int32_t acc = accum[i][b] + bias_data[c + i]; - // Down-scale the final int32_t accumulator to the scale used by our - // (16-bit, typically 3 integer bits) fixed-point format. The - // quantized multiplier and shift here have been pre-computed offline - // (e.g. by toco). - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, - output_shift); - // Saturate, cast to int16_t, and store to output array. - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_ptr[b * output_depth + c + i] = acc; - } - } - } - } else { - TFLITE_DCHECK(false); - return; - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h deleted file mode 100644 index 8d9b318c..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/add.h +++ /dev/null @@ -1,145 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_ - -#include -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_integer_ops { - -inline void CheckArithmeticParams(const ArithmeticParams& params) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - // Input offset is negative input zero point. Activation tensors are - // asymmetric quantized so they span the full int8 range. - TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits::min()); - TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits::min()); - TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits::max()); - TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits::max()); -} - -inline void ElementWise( - int size, const ArithmeticParams& params, const int8_t* input1_data, - const int8_t* input2_data, int8_t* output_data, - void (*check_arithmetic_params)(const ArithmeticParams&), - int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) { - CheckArithmeticParams(params); - for (int i = 0; i < size; ++i) { - output_data[i] = binary_func(input1_data[i], input2_data[i], params); - } -} - -inline void BroadcastBinaryFunction4DSlow( - const ArithmeticParams& params, const RuntimeShape& input1_shape, - const int8_t* input1_data, const RuntimeShape& input2_shape, - const int8_t* input2_data, const RuntimeShape& output_shape, - int8_t* output_data, - void (*check_arithmetic_params)(const ArithmeticParams&), - int8_t (*binary_func)(int8_t, int8_t, const ArithmeticParams&)) { - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(4, output_shape); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - for (int b = 0; b < extended_output_shape.Dims(0); ++b) { - for (int y = 0; y < extended_output_shape.Dims(1); ++y) { - for (int x = 0; x < extended_output_shape.Dims(2); ++x) { - for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - output_data[Offset(extended_output_shape, b, y, x, c)] = binary_func( - input1_data[SubscriptToIndex(desc1, b, y, x, c)], - input2_data[SubscriptToIndex(desc2, b, y, x, c)], params); - } - } - } - } -} - -inline int8_t AddFunc(int8_t x, int8_t y, const ArithmeticParams& params) { - const int32_t input1_val = params.input1_offset + x; - const int32_t input2_val = params.input2_offset + y; - const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); - const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32_t raw_sum = scaled_input1_val + scaled_input2_val; - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - raw_sum, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - return static_cast(clamped_output); -} - -// Element-wise add that can often be used for inner loop of broadcast add as -// well as the non-broadcast add. -inline void AddElementwise(int size, const ArithmeticParams& params, - const int8_t* input1_data, const int8_t* input2_data, - int8_t* output_data) { - ElementWise(size, params, input1_data, input2_data, output_data, - CheckArithmeticParams, AddFunc); -} - -inline void Add(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const int8_t* input1_data, - const RuntimeShape& input2_shape, const int8_t* input2_data, - const RuntimeShape& output_shape, int8_t* output_data) { - CheckArithmeticParams(params); - - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - AddElementwise(flat_size, params, input1_data, input2_data, output_data); -} - -inline void BroadcastAdd4DSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const int8_t* input1_data, - const RuntimeShape& input2_shape, - const int8_t* input2_data, - const RuntimeShape& output_shape, - int8_t* output_data) { - BroadcastBinaryFunction4DSlow(params, input1_shape, input1_data, input2_shape, - input2_data, output_shape, output_data, - CheckArithmeticParams, AddFunc); -} - -} // namespace reference_integer_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h deleted file mode 100644 index ffd4978e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h +++ /dev/null @@ -1,238 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { - -// Fixed-point per-channel-quantization convolution reference kernel. -inline void ConvPerChannel( - const ConvParams& params, const int32_t* output_multiplier, - const int32_t* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { - // Get parameters. - const int32_t input_offset = params.input_offset; // r = s(q - Z) - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const int32_t output_offset = params.output_offset; - - // Set min and max value of the output. - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - - // Consistency check. - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int input_depth = input_shape.Dims(3); - const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - if (bias_data) { - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - } - - // Check dimensions of the tensors. - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int filter_input_depth = filter_shape.Dims(3); - const int groups = input_depth / filter_input_depth; - TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); - const int filters_per_group = output_depth / groups; - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - const int in_y_origin = (out_y * stride_height) - pad_height; - for (int out_x = 0; out_x < output_width; ++out_x) { - const int in_x_origin = (out_x * stride_width) - pad_width; - for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - auto group = out_channel / filters_per_group; - int32_t acc = 0; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - const int in_y = in_y_origin + dilation_height_factor * filter_y; - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - - // Zero padding by omitting the areas outside the image. - const bool is_point_inside_image = - (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height); - - if (!is_point_inside_image) { - continue; - } - - for (int in_channel = 0; in_channel < filter_input_depth; - ++in_channel) { - int32_t input_val = - input_data[Offset(input_shape, batch, in_y, in_x, - in_channel + group * filter_input_depth)]; - int32_t filter_val = filter_data[Offset( - filter_shape, out_channel, filter_y, filter_x, in_channel)]; - // Accumulate with 32 bits accumulator. - // In the nudging process during model quantization, we force - // real value of 0.0 be represented by a quantized value. This - // guarantees that the input_offset is a int8_t, even though - // it is represented using int32_t. int32_t += int8_t * - // (int8_t - int8_t) so the highest value we can get from each - // accumulation is [-127, 127] * ([-128, 127] - - // [-128, 127]), which is [-32512, 32512]. log2(32512) - // = 14.98, which means we can accumulate at least 2^16 - // multiplications without overflow. The accumulator is - // applied to a filter so the accumulation logic will hold as - // long as the filter size (filter_y * filter_x * in_channel) - // does not exceed 2^16, which is the case in all the models - // we have seen so far. - // TODO(b/174275578): Add a check to make sure the - // accumulator depth is smaller than 2^16. - acc += filter_val * (input_val + input_offset); - } - } - } - - if (bias_data) { - acc += bias_data[out_channel]; - } - acc = MultiplyByQuantizedMultiplier( - acc, output_multiplier[out_channel], output_shift[out_channel]); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - static_cast(acc); - } - } - } - } -} - -// Fixed-point per-channel-quantization convolution reference kernel. -// 16-bit data and 8-bit filter -template -inline void ConvPerChannel( - const ConvParams& params, const int32_t* output_multiplier, - const int32_t* output_shift, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const AccumScalar* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - // Get parameters. - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - - // Set min and max value of the output. - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - - // Consistency check. - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int input_depth = input_shape.Dims(3); - const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - if (bias_data) { - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - } - - // Check dimensions of the tensors. - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int filter_input_depth = filter_shape.Dims(3); - const int groups = input_depth / filter_input_depth; - TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); - const int filters_per_group = output_depth / groups; - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - const int in_y_origin = (out_y * stride_height) - pad_height; - for (int out_x = 0; out_x < output_width; ++out_x) { - const int in_x_origin = (out_x * stride_width) - pad_width; - for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - auto group = out_channel / filters_per_group; - AccumScalar acc = 0; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - const int in_y = in_y_origin + dilation_height_factor * filter_y; - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - - // Zero padding by omitting the areas outside the image. - const bool is_point_inside_image = - (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height); - - if (!is_point_inside_image) { - continue; - } - - for (int in_channel = 0; in_channel < filter_input_depth; - ++in_channel) { - int32_t input_val = - input_data[Offset(input_shape, batch, in_y, in_x, - in_channel + group * filter_input_depth)]; - int32_t filter_val = filter_data[Offset( - filter_shape, out_channel, filter_y, filter_x, in_channel)]; - // Accumulate with 64 bits accumulator. - // int64_t += int8_t * int16_t so the highest value we can - // get from each accumulation is [-127, 127] * ([-32768, - // 32767] - - // [-32768, 32767]), which is [-8322945, 8322945]. - // log2(8322945) = 22.99. - acc += filter_val * input_val; - } - } - } - if (bias_data) { - acc += bias_data[out_channel]; - } - int32_t scaled_acc = MultiplyByQuantizedMultiplier( - acc, output_multiplier[out_channel], output_shift[out_channel]); - scaled_acc = std::max(scaled_acc, output_activation_min); - scaled_acc = std::min(scaled_acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - static_cast(scaled_acc); - } - } - } - } -} - -} // namespace reference_integer_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h deleted file mode 100644 index 7676fce0..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h +++ /dev/null @@ -1,291 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { -inline void DepthwiseConvPerChannel( - const DepthwiseParams& params, const int32_t* output_multiplier, - const int32_t* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { - // Get parameters. - // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro. - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const int depth_multiplier = params.depth_multiplier; - const int32_t input_offset = params.input_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - - // Check dimensions of the tensors. - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int input_depth = input_shape.Dims(3); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - for (int m = 0; m < depth_multiplier; ++m) { - const int output_channel = m + in_channel * depth_multiplier; - const int in_x_origin = (out_x * stride_width) - pad_width; - const int in_y_origin = (out_y * stride_height) - pad_height; - int32_t acc = 0; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = - in_y_origin + dilation_height_factor * filter_y; - // Zero padding by omitting the areas outside the image. - const bool is_point_inside_image = - (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height); - if (is_point_inside_image) { - int32_t input_val = input_data[Offset( - input_shape, batch, in_y, in_x, in_channel)]; - int32_t filter_val = filter_data[Offset( - filter_shape, 0, filter_y, filter_x, output_channel)]; - // Accumulate with 32 bits accumulator. - // In the nudging process during model quantization, we force - // real value of 0.0 be represented by a quantized value. This - // guarantees that the input_offset is a int8_t, even though - // it is represented using int32_t. int32_t += int8_t * - // (int8_t - int8_t) so the highest value we can get from each - // accumulation is [-127, 127] * ([-128, 127] - - // [-128, 127]), which is [-32512, 32512]. log2(32512) - // = 14.98, which means we can accumulate at least 2^16 - // multiplications without overflow. The accumulator is - // applied to a filter so the accumulation logic will hold as - // long as the filter size (filter_y * filter_x * in_channel) - // does not exceed 2^16, which is the case in all the models - // we have seen so far. - // TODO(b/174275578): Add a check to make sure the - // accumulator depth is smaller than 2^16. - acc += filter_val * (input_val + input_offset); - } - } - } - if (bias_data) { - acc += bias_data[output_channel]; - } - acc = MultiplyByQuantizedMultiplier( - acc, output_multiplier[output_channel], - output_shift[output_channel]); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, - output_channel)] = static_cast(acc); - } - } - } - } - } -} - -inline void DepthwiseConvPerChannel( - const DepthwiseParams& params, const int32_t* output_multiplier, - const int32_t* output_shift, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const std::int64_t* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - // Get parameters. - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const int depth_multiplier = params.depth_multiplier; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - - // Check dimensions of the tensors. - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int input_depth = input_shape.Dims(3); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - for (int m = 0; m < depth_multiplier; ++m) { - const int output_channel = m + in_channel * depth_multiplier; - const int in_x_origin = (out_x * stride_width) - pad_width; - const int in_y_origin = (out_y * stride_height) - pad_height; - std::int64_t acc = 0; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = - in_y_origin + dilation_height_factor * filter_y; - // Zero padding by omitting the areas outside the image. - const bool is_point_inside_image = - (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height); - if (is_point_inside_image) { - int32_t input_val = input_data[Offset( - input_shape, batch, in_y, in_x, in_channel)]; - int32_t filter_val = filter_data[Offset( - filter_shape, 0, filter_y, filter_x, output_channel)]; - // Accumulate with 64 bits accumulator. - // We assume maximum of 2^16 accumulations as with the 8-bit - // case so actually the value in the accumulator should not - // exceed 40 bits - acc += static_cast(filter_val) * - static_cast(input_val); - } - } - } - if (bias_data) { - acc += bias_data[output_channel]; - } - int32_t scaled_acc = MultiplyByQuantizedMultiplier( - acc, output_multiplier[output_channel], - output_shift[output_channel]); - scaled_acc = std::max(scaled_acc, output_activation_min); - scaled_acc = std::min(scaled_acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, - output_channel)] = - static_cast(scaled_acc); - } - } - } - } - } -} - -inline void DepthwiseConvHybridPerChannel( - const DepthwiseParams& params, float* scaling_factors_ptr, - const RuntimeShape& input_shape, const int8_t* input_data, - const RuntimeShape& filter_shape, const int8_t* filter_data, - const RuntimeShape& bias_shape, const float* bias_data, - const RuntimeShape& output_shape, float* output_data, - const float* per_channel_scale, int32_t* input_offset) { - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int dilation_width_factor = params.dilation_width_factor; - const int dilation_height_factor = params.dilation_height_factor; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - const int depth_multiplier = params.depth_multiplier; - const float output_activation_min = params.float_activation_min; - const float output_activation_max = params.float_activation_max; - // Check dimensions of the tensors. - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int input_depth = input_shape.Dims(3); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int bias_depth = bias_shape.FlatSize(); - TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier); - TFLITE_DCHECK_EQ(bias_depth, output_depth); - - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - for (int m = 0; m < depth_multiplier; ++m) { - const int output_channel = m + in_channel * depth_multiplier; - const int in_x_origin = (out_x * stride_width) - pad_width; - const int in_y_origin = (out_y * stride_height) - pad_height; - int32_t acc = 0; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - const int in_x = in_x_origin + dilation_width_factor * filter_x; - const int in_y = - in_y_origin + dilation_height_factor * filter_y; - // Zero padding by omitting the areas outside the image. - const bool is_point_inside_image = - (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && - (in_y < input_height); - if (is_point_inside_image) { - int32_t input_val = input_data[Offset( - input_shape, batch, in_y, in_x, in_channel)]; - int32_t filter_val = filter_data[Offset( - filter_shape, 0, filter_y, filter_x, output_channel)]; - acc += filter_val * (input_val - input_offset[batch]); - } - } - } - float acc_float = static_cast(acc); - acc_float *= - per_channel_scale[output_channel] * scaling_factors_ptr[batch]; - if (bias_data && output_channel < bias_depth) { - acc_float += bias_data[output_channel]; - } - output_data[Offset(output_shape, batch, out_y, out_x, - output_channel)] = - ActivationFunctionWithMinMax(acc_float, output_activation_min, - output_activation_max); - } - } - } - } - } -} - -} // namespace reference_integer_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h deleted file mode 100644 index 634f0bff..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +++ /dev/null @@ -1,201 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { - -// For per-channel functions, since it is defined in quantization spec that -// weights are symmetric -// (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric), -// zero_point (params.weights_offset) is always 0. -// However, for per-tensor functions, params.weights_offset is still applied for -// backward compatibility. - -inline void FullyConnectedPerChannel( - const FullyConnectedParams& params, const int32_t* output_multiplier, - const int* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { - const int32_t input_offset = params.input_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int batches = output_shape.Dims(0); - const int output_depth = output_shape.Dims(1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - int32_t acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += filter_val * (input_val + input_offset); - } - if (bias_data) { - acc += bias_data[out_c]; - } - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c], - output_shift[out_c]); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc); - } - } -} - -template -inline void FullyConnectedPerChannel( - const FullyConnectedParams& params, const int32_t* output_multiplier, - const int* output_shift, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const AccumScalar* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int output_dim_count = output_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = output_shape.Dims(output_dim_count - 1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - AccumScalar acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += filter_val * input_val; - } - if (bias_data) { - acc += bias_data[out_c]; - } - int32_t acc_scaled = MultiplyByQuantizedMultiplier( - acc, output_multiplier[out_c], output_shift[out_c]); - acc_scaled = std::max(acc_scaled, output_activation_min); - acc_scaled = std::min(acc_scaled, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc_scaled); - } - } -} - -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data) { - const int32_t input_offset = params.input_offset; - const int32_t filter_offset = params.weights_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int output_dim_count = output_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = output_shape.Dims(output_dim_count - 1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - int32_t acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += (filter_val + filter_offset) * (input_val + input_offset); - } - if (bias_data) { - acc += bias_data[out_c]; - } - acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc); - } - } -} - -template -inline void FullyConnected( - const FullyConnectedParams& params, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const AccumScalar* bias_data, const RuntimeShape& output_shape, - int16_t* output_data) { - const int32_t filter_offset = params.weights_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); - TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); - - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int output_dim_count = output_shape.DimensionsCount(); - const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); - const int output_depth = output_shape.Dims(output_dim_count - 1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - for (int b = 0; b < batches; ++b) { - for (int out_c = 0; out_c < output_depth; ++out_c) { - AccumScalar acc = 0; - for (int d = 0; d < accum_depth; ++d) { - int32_t input_val = input_data[b * accum_depth + d]; - int32_t filter_val = filter_data[out_c * accum_depth + d]; - acc += (filter_val + filter_offset) * input_val; - } - if (bias_data) { - acc += bias_data[out_c]; - } - int32_t acc_scaled = - MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - acc_scaled = std::max(acc_scaled, output_activation_min); - acc_scaled = std::min(acc_scaled, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc_scaled); - } - } -} - -} // namespace reference_integer_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h deleted file mode 100644 index 164a8367..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { - -inline void L2Normalization(int32_t input_zero_point, int32_t outer_size, - int32_t depth, const int8_t* input_data, - int8_t* output_data) { - static constexpr int8_t kMinInt8 = std::numeric_limits::min(); - static constexpr int8_t kMaxInt8 = std::numeric_limits::max(); - // The output scale must be in sync with Prepare(). - // Output is in 1/128 scale so the actual output range is nudged from [-1, 1] - // to [-1, 127/128]. - static constexpr int32_t kOutputScale = 7; - for (int outer_index = 0; outer_index < outer_size; ++outer_index) { - // int32_t = (int8_t - int8_t) ^ 2. - // ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is - // safe from overflowing in at least 2^16 steps. - int32_t acc = 0; - for (int inner_index = 0; inner_index < depth; ++inner_index) { - int32_t input = - input_data[depth * outer_index + inner_index] - input_zero_point; - acc += input * input; - } - int32_t inv_l2norm_multiplier; - int inv_l2norm_shift; - GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier, - &inv_l2norm_shift); - - for (int inner_index = 0; inner_index < depth; ++inner_index) { - int32_t input = - input_data[depth * outer_index + inner_index] - input_zero_point; - - // Rescale and downcast. Rescale is folded into the division. - int32_t output_in_q24 = MultiplyByQuantizedMultiplier( - input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale); - output_in_q24 = - std::min(static_cast(kMaxInt8), - std::max(static_cast(kMinInt8), output_in_q24)); - output_data[depth * outer_index + inner_index] = - static_cast(output_in_q24); - } - } -} -} // namespace reference_integer_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h deleted file mode 100644 index 16eff133..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h +++ /dev/null @@ -1,121 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_ - -#include -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { - -inline void Logistic(int32_t input_zero_point, int32_t input_range_radius, - int32_t input_multiplier, int32_t input_left_shift, - int32_t input_size, const int8_t* input_data, - int8_t* output_data) { - // Integer bits must be in sync with Prepare() function. - static constexpr int32_t kInputIntegerBits = 4; - static constexpr int32_t kOutputIntegerBits = 8; - static constexpr int8_t kMinInt8 = std::numeric_limits::min(); - static constexpr int8_t kMaxInt8 = std::numeric_limits::max(); - static constexpr int32_t kOutputZeroPoint = -128; - - for (int i = 0; i < input_size; ++i) { - const int32_t input = - static_cast(input_data[i]) - input_zero_point; - if (input <= -input_range_radius) { - output_data[i] = kMinInt8; - } else if (input >= input_range_radius) { - output_data[i] = kMaxInt8; - } else { - const int32_t input_in_q4 = MultiplyByQuantizedMultiplier( - input, input_multiplier, input_left_shift); - using FixedPoint4 = gemmlowp::FixedPoint; - const int32_t output_in_q0 = - gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw(); - - // Rescale and downcast. - using gemmlowp::RoundingDivideByPOT; - int32_t output_in_q23 = - RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits); - output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint, - static_cast(kMinInt8)), - static_cast(kMaxInt8)); - output_data[i] = static_cast(output_in_q23); - } - } -} - -inline void Logistic(int32_t input_multiplier, int32_t input_left_shift, - int32_t input_size, const int16_t* ptr_input_data, - int16_t* ptr_output_data) { - // We use the LUT for sigmoid and take into account, that - // tanh(x) = 2*sigmoid(2*x) - 1 - - // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7]. - // In case of general parameter scale, multiplier 3 is taken into account - // in TanhPrepare function and it is included in - // input_multiplier already. - - TFLITE_DCHECK_GE(input_left_shift, 0); - if (input_multiplier == 0) { // power of two case - input_multiplier = 3 << input_left_shift; - input_left_shift = 0; - } - - int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0; - - for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) { - int32_t input_data = - ((*ptr_input_data) * input_multiplier + round) >> input_left_shift; - - // We do interpolation on unsigned values. - uint32_t abs_input_data = abs(input_data); - - // We divide by 2 power of 9, because - // we need to divide by 2 in power of 7 for - // the input conversion + 1/4 from the scale above. - - // Define uh as uint32_t type not to make this function overflow. - uint32_t uh = abs_input_data >> 9; - uint32_t result; - - if (uh >= 255) { - // Saturate to maximum. - result = 0x7FFF << 10; - } else { - uint32_t ua = sigmoid_table_uint16[uh]; - uint32_t ub = sigmoid_table_uint16[uh + 1]; - uint32_t ut = abs_input_data & 0x1ff; - // Interpolation is done using the fractional bit. - result = (ua << 9) + ut * (ub - ua); - } - - result = (input_data >= 0) ? (result + (1 << 9)) - : ((1 << (16 + 9)) - result + (1 << 9) - 1); - - // Back to 16-bit. - result >>= 10; - - *ptr_output_data = result; - } -} - -} // namespace reference_integer_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h deleted file mode 100644 index 09d37b72..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { - -template -inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier, - int32_t shift, const RuntimeShape& unextended_input_shape, - const integer_type* input_data, int32_t input_zero_point, - const RuntimeShape& unextended_output_shape, - integer_type* output_data, int32_t output_zero_point) { - // Current implementation only supports dimension equals 4 and simultaneous - // reduction over width and height. - TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4); - TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - const int output_batch = output_shape.Dims(0); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int output_depth = output_shape.Dims(3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int num_elements_in_axis = input_width * input_height; - - TFLITE_CHECK_EQ(op_params.axis_count, 2); - TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) || - (op_params.axis[0] == 2 && op_params.axis[1] == 1)); - TFLITE_CHECK_EQ(output_height, 1); - TFLITE_CHECK_EQ(output_width, 1); - - static constexpr int32_t kMinInt = std::numeric_limits::min(); - static constexpr int32_t kMaxInt = std::numeric_limits::max(); - - for (int out_b = 0; out_b < output_batch; ++out_b) { - for (int out_d = 0; out_d < output_depth; ++out_d) { - int32_t acc = 0; - for (int in_h = 0; in_h < input_height; ++in_h) { - for (int in_w = 0; in_w < input_width; ++in_w) { - acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] - - input_zero_point; - } - } - acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift); - acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis - : (acc - num_elements_in_axis / 2) / num_elements_in_axis; - acc += output_zero_point; - acc = std::min(std::max(acc, kMinInt), kMaxInt); - output_data[Offset(output_shape, out_b, 0, 0, out_d)] = - static_cast(acc); - } - } -} - -} // namespace reference_integer_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h deleted file mode 100644 index 22e89740..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +++ /dev/null @@ -1,133 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_ - -#include - -#include "fixedpoint/fixedpoint.h" -#include "ruy/profiler/instrumentation.h" // from @ruy -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { - -template -inline void MulElementwise(int size, const ArithmeticParams& params, - const T* input1_data, const T* input2_data, - T* output_data) { - for (int i = 0; i < size; ++i) { - const int32_t input1_val = params.input1_offset + input1_data[i]; - const int32_t input2_val = params.input2_offset + input2_data[i]; - const int32_t unclamped_result = - params.output_offset + - MultiplyByQuantizedMultiplier(input1_val * input2_val, - params.output_multiplier, - params.output_shift); - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, unclamped_result)); - output_data[i] = static_cast(clamped_output); - } -} - -template -inline void Mul(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const T* input1_data, - const RuntimeShape& input2_shape, const T* input2_data, - const RuntimeShape& output_shape, T* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - ruy::profiler::ScopeLabel label("Mul/8bit"); - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - MulElementwise(flat_size, params, input1_data, input2_data, output_data); -} - -// Mul with 16 bit inputs and int8_t outputs. -inline void Mul(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const int16_t* input1_data, - const RuntimeShape& input2_shape, const int16_t* input2_data, - const RuntimeShape& output_shape, int8_t* output_data) { - ruy::profiler::ScopeLabel label("Mul/Int16Int8"); - int32_t output_offset = params.output_offset; - int32_t output_activation_min = params.quantized_activation_min; - int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - for (int i = 0; i < flat_size; i++) { - // F0 uses 0 integer bits, range [-1, 1]. - using F0 = gemmlowp::FixedPoint; - - F0 unclamped_result = - F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); - int16_t rescaled_result = - gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8); - int16_t clamped_result = std::min( - output_activation_max - output_offset, rescaled_result); - clamped_result = std::max(output_activation_min - output_offset, - clamped_result); - output_data[i] = output_offset + clamped_result; - } -} - -template -inline void BroadcastMul4DSlow( - const ArithmeticParams& params, const RuntimeShape& input1_shape, - const T* input1_data, const RuntimeShape& input2_shape, - const T* input2_data, const RuntimeShape& output_shape, T* output_data) { - ruy::profiler::ScopeLabel label("BroadcastMul4DSlow"); - - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - // The input shapes are extended as part of NdArrayDesc initialization. - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(4, output_shape); - - for (int b = 0; b < extended_output_shape.Dims(0); ++b) { - for (int y = 0; y < extended_output_shape.Dims(1); ++y) { - for (int x = 0; x < extended_output_shape.Dims(2); ++x) { - for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - const int32_t input1_val = - params.input1_offset + - input1_data[SubscriptToIndex(desc1, b, y, x, c)]; - const int32_t input2_val = - params.input2_offset + - input2_data[SubscriptToIndex(desc2, b, y, x, c)]; - const int32_t unclamped_result = - params.output_offset + - MultiplyByQuantizedMultiplier(input1_val * input2_val, - params.output_multiplier, - params.output_shift); - const int32_t clamped_output = std::min( - params.quantized_activation_max, - std::max(params.quantized_activation_min, unclamped_result)); - output_data[Offset(extended_output_shape, b, y, x, c)] = - static_cast(clamped_output); - } - } - } - } -} - -} // namespace reference_integer_ops -} // namespace tflite -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h deleted file mode 100644 index 4dc31d9e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h +++ /dev/null @@ -1,264 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_ - -#include -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { - -inline bool AveragePool(const PoolParams& params, - const RuntimeShape& input_shape, - const int8_t* input_data, - const RuntimeShape& output_shape, int8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int stride_height = params.stride_height; - const int stride_width = params.stride_width; - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int channel = 0; channel < depth; ++channel) { - const int in_x_origin = - (out_x * stride_width) - params.padding_values.width; - const int in_y_origin = - (out_y * stride_height) - params.padding_values.height; - // Compute the boundaries of the filter region clamped so as to - // ensure that the filter window fits in the input array. - const int filter_x_start = std::max(0, -in_x_origin); - const int filter_x_end = - std::min(params.filter_width, input_width - in_x_origin); - const int filter_y_start = std::max(0, -in_y_origin); - const int filter_y_end = - std::min(params.filter_height, input_height - in_y_origin); - int32_t acc = 0; - int filter_count = 0; - for (int filter_y = filter_y_start; filter_y < filter_y_end; - ++filter_y) { - for (int filter_x = filter_x_start; filter_x < filter_x_end; - ++filter_x) { - const int in_x = in_x_origin + filter_x; - const int in_y = in_y_origin + filter_y; - acc += - input_data[Offset(input_shape, batch, in_y, in_x, channel)]; - filter_count++; - } - } - if (filter_count == 0) return false; - // Round to the closest integer value. - acc = acc > 0 ? (acc + filter_count / 2) / filter_count - : (acc - filter_count / 2) / filter_count; - acc = std::max(acc, params.quantized_activation_min); - acc = std::min(acc, params.quantized_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - static_cast(acc); - } - } - } - } - return true; -} - -inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& output_shape, - int8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - TFLITE_DCHECK_GE(params.quantized_activation_min, - std::numeric_limits::min()); - TFLITE_DCHECK_LE(params.quantized_activation_max, - std::numeric_limits::max()); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int stride_height = params.stride_height; - const int stride_width = params.stride_width; - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int channel = 0; channel < depth; ++channel) { - const int in_x_origin = - (out_x * stride_width) - params.padding_values.width; - const int in_y_origin = - (out_y * stride_height) - params.padding_values.height; - // Compute the boundaries of the filter region clamped so as to - // ensure that the filter window fits in the input array. - const int filter_x_start = std::max(0, -in_x_origin); - const int filter_x_end = - std::min(params.filter_width, input_width - in_x_origin); - const int filter_y_start = std::max(0, -in_y_origin); - const int filter_y_end = - std::min(params.filter_height, input_height - in_y_origin); - int8_t max = std::numeric_limits::lowest(); - for (int filter_y = filter_y_start; filter_y < filter_y_end; - ++filter_y) { - for (int filter_x = filter_x_start; filter_x < filter_x_end; - ++filter_x) { - const int in_x = in_x_origin + filter_x; - const int in_y = in_y_origin + filter_y; - max = std::max( - max, - input_data[Offset(input_shape, batch, in_y, in_x, channel)]); - } - } - max = std::max(max, params.quantized_activation_min); - max = std::min(max, params.quantized_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - static_cast(max); - } - } - } - } -} - -inline bool AveragePool(const PoolParams& params, - const RuntimeShape& input_shape, - const int16_t* input_data, - const RuntimeShape& output_shape, - int16_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int stride_height = params.stride_height; - const int stride_width = params.stride_width; - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int channel = 0; channel < depth; ++channel) { - const int in_x_origin = - (out_x * stride_width) - params.padding_values.width; - const int in_y_origin = - (out_y * stride_height) - params.padding_values.height; - // Compute the boundaries of the filter region clamped so as to - // ensure that the filter window fits in the input array. - const int filter_x_start = std::max(0, -in_x_origin); - const int filter_x_end = - std::min(params.filter_width, input_width - in_x_origin); - const int filter_y_start = std::max(0, -in_y_origin); - const int filter_y_end = - std::min(params.filter_height, input_height - in_y_origin); - int32_t acc = 0; - int filter_count = 0; - for (int filter_y = filter_y_start; filter_y < filter_y_end; - ++filter_y) { - for (int filter_x = filter_x_start; filter_x < filter_x_end; - ++filter_x) { - const int in_x = in_x_origin + filter_x; - const int in_y = in_y_origin + filter_y; - acc += - input_data[Offset(input_shape, batch, in_y, in_x, channel)]; - filter_count++; - } - } - if (filter_count == 0) return false; - // Round to the closest integer value. - acc = acc > 0 ? (acc + filter_count / 2) / filter_count - : (acc - filter_count / 2) / filter_count; - acc = std::max(acc, params.quantized_activation_min); - acc = std::min(acc, params.quantized_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - static_cast(acc); - } - } - } - } - return true; -} - -inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& output_shape, - int16_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - TFLITE_DCHECK_GE(params.quantized_activation_min, - std::numeric_limits::min()); - TFLITE_DCHECK_LE(params.quantized_activation_max, - std::numeric_limits::max()); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int stride_height = params.stride_height; - const int stride_width = params.stride_width; - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int channel = 0; channel < depth; ++channel) { - const int in_x_origin = - (out_x * stride_width) - params.padding_values.width; - const int in_y_origin = - (out_y * stride_height) - params.padding_values.height; - // Compute the boundaries of the filter region clamped so as to - // ensure that the filter window fits in the input array. - const int filter_x_start = std::max(0, -in_x_origin); - const int filter_x_end = - std::min(params.filter_width, input_width - in_x_origin); - const int filter_y_start = std::max(0, -in_y_origin); - const int filter_y_end = - std::min(params.filter_height, input_height - in_y_origin); - int16_t max = std::numeric_limits::lowest(); - for (int filter_y = filter_y_start; filter_y < filter_y_end; - ++filter_y) { - for (int filter_x = filter_x_start; filter_x < filter_x_end; - ++filter_x) { - const int in_x = in_x_origin + filter_x; - const int in_y = in_y_origin + filter_y; - max = std::max( - max, - input_data[Offset(input_shape, batch, in_y, in_x, channel)]); - } - } - max = std::max(max, params.quantized_activation_min); - max = std::min(max, params.quantized_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - static_cast(max); - } - } - } - } -} - -} // namespace reference_integer_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h deleted file mode 100644 index 7b1e003b..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_ - -#include -#include - -#include "fixedpoint/fixedpoint.h" -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { - -inline void Tanh(int32_t input_zero_point, int32_t input_range_radius, - int32_t input_multiplier, int32_t input_shift, - const RuntimeShape& input_shape, const int8_t* input_data, - const RuntimeShape& output_shape, int8_t* output_data) { - // Integer bits must be in sync with Prepare() function. - static constexpr int32_t kInputIntegerBits = 4; - static constexpr int32_t kOutputScale = 7; - static constexpr int32_t kMinInt8 = std::numeric_limits::min(); - static constexpr int32_t kMaxInt8 = std::numeric_limits::max(); - using F4 = gemmlowp::FixedPoint; - - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - for (int i = 0; i < flat_size; ++i) { - const int32_t input = - static_cast(input_data[i]) - input_zero_point; - if (input <= -input_range_radius) { - output_data[i] = kMinInt8; - } else if (input >= input_range_radius) { - output_data[i] = kMaxInt8; - } else { - const int32_t input_in_q4 = - MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift); - const int32_t output_in_q0 = - gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw(); - - // Rescale and downcast. - using gemmlowp::RoundingDivideByPOT; - int32_t output_in_q24 = - RoundingDivideByPOT(output_in_q0, 31 - kOutputScale); - output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8); - output_data[i] = static_cast(output_in_q24); - } - } -} - -inline void Tanh(int32_t input_multiplier, int32_t input_left_shift, - const RuntimeShape& input_shape, const int16_t* ptr_input_data, - const RuntimeShape& output_shape, int16_t* ptr_output_data) { - // We use the LUT for sigmoid and take into account, that - // tanh(x) = 2*sigmoid(2*x) - 1 - - // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7]. - // In case of general parameter scale, multiplier 3 is taken into account - // in TanhPrepare function and it is included in - // input_multiplier already. - - if (input_multiplier == 0) { // power of two case - input_multiplier = 3 << input_left_shift; - input_left_shift = 0; - } - - int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0; - - int flat_size = MatchingFlatSize(input_shape, output_shape); - - for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) { - int32_t input_data = - ((*ptr_input_data) * input_multiplier + round) >> input_left_shift; - - uint32_t abs_input_data = abs(input_data); - uint32_t uh = abs_input_data >> 8; - int32_t result; - - if (uh >= 255) { - // Saturate to maximum. - result = 0xFFFF << 8; - } else { - uint32_t ua = sigmoid_table_uint16[uh]; - uint32_t ub = sigmoid_table_uint16[uh + 1]; - - uint8_t ut = abs_input_data & 0xFF; - - result = (ua << 8) + ut * (ub - ua); - } - - result = (input_data >= 0) - ? (result - (1 << (14 + 9)) + (1 << (9 - 2))) - : (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1); - - // Convert back to 16-bit. - result >>= (9 - 1); - - *ptr_output_data = result; - } -} - -} // namespace reference_integer_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h deleted file mode 100644 index 92919a71..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h +++ /dev/null @@ -1,224 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_integer_ops { - -// Fixed-point per-channel-quantization transpose convolution reference kernel. -inline void TransposeConv( - const ConvParams& params, const int32_t* output_multiplier, - const int32_t* output_shift, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - int8_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data, - int32_t* scratch_buffer) { - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - (void)im2col_data; // only used in optimized code. - (void)im2col_shape; // only used in optimized code. - - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); - const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - if (bias_data) { - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - } - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int32_t input_offset = params.input_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_activation_min = std::numeric_limits::min(); - const int32_t output_activation_max = std::numeric_limits::max(); - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - - const int num_elements = output_shape.FlatSize(); - // We need to initialize scratch_buffer to all 0s, as we apply the same - // 'scatter' based trick as in float version. - memset(scratch_buffer, 0, num_elements * sizeof(int32_t)); - - // Loop through input elements one at a time. - for (int batch = 0; batch < batches; ++batch) { - for (int in_y = 0; in_y < input_height; ++in_y) { - for (int in_x = 0; in_x < input_width; ++in_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - // Loop through the output elements it will influence. - const int out_x_origin = (in_x * stride_width) - pad_width; - const int out_y_origin = (in_y * stride_height) - pad_height; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - for (int out_channel = 0; out_channel < output_depth; - ++out_channel) { - // Compute output element location. - const int out_x = out_x_origin + filter_x; - const int out_y = out_y_origin + filter_y; - // We cannot accumulate out of bounds. - if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && - (out_y < output_height)) { - const int8_t input_value = input_data[Offset( - input_shape, batch, in_y, in_x, in_channel)]; - const int8_t filter_value = - filter_data[Offset(filter_shape, out_channel, filter_y, - filter_x, in_channel)]; - scratch_buffer[Offset(output_shape, batch, out_y, out_x, - out_channel)] += - (input_value + input_offset) * filter_value; - } - } - } - } - } - } - } - } - - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x, - out_channel)]; - if (bias_data) { - acc += bias_data[out_channel]; - } - acc = MultiplyByQuantizedMultiplier( - acc, output_multiplier[out_channel], output_shift[out_channel]); - acc += output_offset; - acc = std::max(acc, output_activation_min); - acc = std::min(acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - static_cast(acc); - } - } - } - } -} - -// int16_t input (zero_point=0), int8_t filter, int32 or int64 accumulator -template -inline void TransposeConv( - const ConvParams& params, const int32_t* output_multiplier, - const int32_t* output_shift, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& filter_shape, - const int8_t* filter_data, const RuntimeShape& bias_shape, - const Scalar* bias_data, const RuntimeShape& output_shape, - int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data, - Scalar* scratch_buffer) { - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - (void)im2col_data; // only used in optimized code. - (void)im2col_shape; // only used in optimized code. - - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); - const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - if (bias_data) { - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - } - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int32_t output_activation_min = std::numeric_limits::min(); - const int32_t output_activation_max = std::numeric_limits::max(); - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - - const int num_elements = output_shape.FlatSize(); - // We need to initialize scratch_buffer to all 0s, as we apply the same - // 'scatter' based trick as in float version. - memset(scratch_buffer, 0, num_elements * sizeof(Scalar)); - - // Loop through input elements one at a time. - for (int batch = 0; batch < batches; ++batch) { - for (int in_y = 0; in_y < input_height; ++in_y) { - for (int in_x = 0; in_x < input_width; ++in_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - // Loop through the output elements it will influence. - const int out_x_origin = (in_x * stride_width) - pad_width; - const int out_y_origin = (in_y * stride_height) - pad_height; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - for (int out_channel = 0; out_channel < output_depth; - ++out_channel) { - // Compute output element location. - const int out_x = out_x_origin + filter_x; - const int out_y = out_y_origin + filter_y; - // We cannot accumulate out of bounds. - if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && - (out_y < output_height)) { - const int32_t input_value = input_data[Offset( - input_shape, batch, in_y, in_x, in_channel)]; - const int32_t filter_value = - filter_data[Offset(filter_shape, out_channel, filter_y, - filter_x, in_channel)]; - scratch_buffer[Offset(output_shape, batch, out_y, out_x, - out_channel)] += - input_value * filter_value; - } - } - } - } - } - } - } - } - - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - Scalar acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x, - out_channel)]; - if (bias_data) { - acc += bias_data[out_channel]; - } - int32_t scaled_acc = MultiplyByQuantizedMultiplier( - acc, output_multiplier[out_channel], output_shift[out_channel]); - scaled_acc = std::max(scaled_acc, output_activation_min); - scaled_acc = std::min(scaled_acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - static_cast(scaled_acc); - } - } - } - } -} - -} // namespace reference_integer_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/l2normalization.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/l2normalization.h deleted file mode 100644 index 7587d2b5..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/l2normalization.h +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_ - -#include -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -inline void L2Normalization(const tflite::L2NormalizationParams& op_params, - const RuntimeShape& input_shape, - const float* input_data, - const RuntimeShape& output_shape, - float* output_data, float epsilon = 1e-6) { - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - for (int i = 0; i < outer_size; ++i) { - float squared_l2_norm = 0; - for (int c = 0; c < depth; ++c) { - const float val = input_data[depth * i + c]; - squared_l2_norm += val * val; - } - float l2_norm = std::sqrt(squared_l2_norm); - l2_norm = std::max(l2_norm, epsilon); - for (int c = 0; c < depth; ++c) { - output_data[depth * i + c] = input_data[depth * i + c] / l2_norm; - } - } -} - -inline void L2Normalization(const tflite::L2NormalizationParams& op_params, - const RuntimeShape& input_shape, - const uint8_t* input_data, - const RuntimeShape& output_shape, - uint8_t* output_data) { - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int32_t input_zero_point = op_params.input_zero_point; - - for (int i = 0; i < outer_size; ++i) { - int32_t square_l2_norm = 0; - for (int c = 0; c < depth; c++) { - int32_t diff = input_data[depth * i + c] - input_zero_point; - square_l2_norm += diff * diff; - } - int32_t inv_l2norm_multiplier; - int inv_l2norm_shift; - GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift, - &inv_l2norm_multiplier, &inv_l2norm_shift); - for (int c = 0; c < depth; c++) { - int32_t diff = input_data[depth * i + c] - input_zero_point; - int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp( - 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); - int32_t unclamped_output_val = 128 + rescaled_diff; - int32_t output_val = - std::min(static_cast(255), - std::max(static_cast(0), unclamped_output_val)); - output_data[depth * i + c] = static_cast(output_val); - } - } -} - -} // namespace reference_ops -} // namespace tflite -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/leaky_relu.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/leaky_relu.h deleted file mode 100644 index 06f691ab..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/leaky_relu.h +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_ - -#include -#include - -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_ops { - -inline void LeakyRelu(const tflite::LeakyReluParams& params, - const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - const float val = input_data[i]; - // Note that alpha might be > 1 or < 0, so we don't use std::max here. - output_data[i] = val > 0 ? val : val * params.alpha; - } -} - -template -inline void QuantizeLeakyRelu(const LeakyReluParams& params, - const RuntimeShape& input_shape, - const T* input_data, - const RuntimeShape& output_shape, - T* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - static const int32_t quantized_min = std::numeric_limits::min(); - static const int32_t quantized_max = std::numeric_limits::max(); - for (int i = 0; i < flat_size; ++i) { - const int32_t input_value = input_data[i] - params.input_offset; - int32_t unclamped_output; - if (input_value >= 0) { - unclamped_output = params.output_offset + - MultiplyByQuantizedMultiplier( - input_value, params.output_multiplier_identity, - params.output_shift_identity); - } else { - unclamped_output = params.output_offset + - MultiplyByQuantizedMultiplier( - input_value, params.output_multiplier_alpha, - params.output_shift_alpha); - } - const T clamped_output = - std::min(quantized_max, std::max(quantized_min, unclamped_output)); - output_data[i] = static_cast(clamped_output); - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LEAKY_RELU_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/log_softmax.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/log_softmax.h deleted file mode 100644 index 394dd3a9..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/log_softmax.h +++ /dev/null @@ -1,256 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_ - -#include -#include -#include - -#include "fixedpoint/fixedpoint.h" -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_ops { - -inline void LogSoftmax(const SoftmaxParams& params, - const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - - for (int i = 0; i < outer_size; ++i) { - // Find max element value which we'll use to ensure numerical stability - // taking advantage of the following equality: - // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C))) - float max = std::numeric_limits::lowest(); - for (int c = 0; c < depth; ++c) { - max = std::max(max, input_data[i * depth + c]); - } - - // Compute sum. - float sum = 0.f; - for (int c = 0; c < depth; ++c) { - sum += std::exp(input_data[i * depth + c] - max); - } - - // Compute result. - const float log_sum = std::log(sum); - for (int c = 0; c < depth; ++c) { - output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum; - } - } -} - -inline void LogSoftmax(const SoftmaxParams& params, - const RuntimeShape& input_shape, - const uint8_t* input_data, - const RuntimeShape& output_shape, uint8_t* output_data) { - const int32_t input_multiplier = params.input_multiplier; - const int32_t input_left_shift = params.input_left_shift; - const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor; - const int32_t reverse_scaling_right_shift = - params.reverse_scaling_right_shift; - const int diff_min = params.diff_min; - // The representation chosen for the input to the exp() function is Q5.26. - // We need to leave extra space since values that we skip might be as large - // as -32 before multiplying by input_beta_multiplier, and therefore as - // large as -16 afterwards. Note that exp(-8) is definitely not - // insignificant to accumulation, but exp(-16) definitely is. - static constexpr int kScaledDiffIntegerBits = 5; - static constexpr int kAccumulationIntegerBits = 12; - static constexpr int kOutputIntegerBits = 4; - using FixedPointScaledDiff = - gemmlowp::FixedPoint; - using FixedPointAccum = - gemmlowp::FixedPoint; - - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - - for (int i = 0; i < outer_size; ++i) { - uint8_t max_in_row = 0; - for (int c = 0; c < depth; ++c) { - max_in_row = std::max(max_in_row, input_data[i * depth + c]); - } - - FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); - for (int c = 0; c < depth; ++c) { - int32_t input_diff = - static_cast(input_data[i * depth + c]) - max_in_row; - if (input_diff >= diff_min) { - const int32_t input_diff_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_multiplier, input_left_shift); - const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); - sum_of_exps = sum_of_exps + gemmlowp::Rescale( - exp_on_negative_values(scaled_diff_f8)); - } - } - - const int32_t fixed_log_sum_of_exps = - log_x_for_x_greater_than_or_equal_to_1( - sum_of_exps) - .raw(); - - // rescaled_diff_min is smallest representable in - // Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the - // log-sub-exps that will be subtracted in the loop. - // - // The thresholds diff_min, etc are negative. - const int rescaled_diff_min = - fixed_log_sum_of_exps + std::numeric_limits::lowest(); - const int adjusted_diff_min = - std::max(static_cast( - diff_min - 1), // Note use of > below instead of >= above. - MultiplyByQuantizedMultiplierSmallerThanOneExp( - rescaled_diff_min, reverse_scaling_divisor, - -reverse_scaling_right_shift)); - - for (int c = 0; c < depth; ++c) { - int32_t input_diff = - static_cast(input_data[i * depth + c]) - max_in_row; - if (input_diff > adjusted_diff_min) { - const int32_t input_diff_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_multiplier, input_left_shift); - int32_t unsat_output = - gemmlowp::RoundingDivideByPOT( - (input_diff_rescaled - fixed_log_sum_of_exps), - 31 - kScaledDiffIntegerBits - kOutputIntegerBits) + - 255; - - output_data[i * depth + c] = static_cast( - std::max(std::min(unsat_output, static_cast(255)), - static_cast(0))); - } else { - // Set output to smallest value. - output_data[i * depth + c] = 0; - } - } - } -} - -template -inline void LogSoftmaxQuantized(const SoftmaxParams& params, - const size_t outer_size, const size_t depth, - const RuntimeShape& input_shape, - const T* input_data, - const RuntimeShape& output_shape, - T* output_data) { - const int32_t input_multiplier = params.input_multiplier; - const int32_t input_left_shift = params.input_left_shift; - const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor; - const int32_t reverse_scaling_right_shift = - params.reverse_scaling_right_shift; - const int diff_min = params.diff_min; - - static constexpr T kMinT8 = std::numeric_limits::min(); - static constexpr T kMaxT8 = std::numeric_limits::max(); - static constexpr int32_t kMinInt32 = std::numeric_limits::min(); - - // All IntegerBits must agree with Prepare function. - // Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible. - static constexpr int kInputIntegerBits = 5; - static constexpr int kAccumulationIntegerBits = 12; - static constexpr int kOutputIntegerBits = 4; - using F5 = gemmlowp::FixedPoint; - using F12 = gemmlowp::FixedPoint; - - for (size_t outer_index = 0; outer_index < outer_size; ++outer_index) { - T max_in_row = kMinT8; - for (size_t inner_index = 0; inner_index < depth; ++inner_index) { - max_in_row = - std::max(max_in_row, input_data[outer_index * depth + inner_index]); - } - - // Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps. - F12 sum_of_exps_in_q12 = F12::FromRaw(0); - for (size_t inner_index = 0; inner_index < depth; ++inner_index) { - int32_t input_diff = - static_cast(input_data[outer_index * depth + inner_index]) - - max_in_row; - if (input_diff >= diff_min) { - const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier( - input_diff, input_multiplier, input_left_shift); - sum_of_exps_in_q12 = - sum_of_exps_in_q12 + - gemmlowp::Rescale( - exp_on_negative_values(F5::FromRaw(input_diff_in_q5))); - } - } - - const int32_t log_sum_of_exps_in_q5 = - log_x_for_x_greater_than_or_equal_to_1( - sum_of_exps_in_q12) - .raw(); - - // Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is - // smallest representable in Q5.26 plus the log_sum_of_exps. - const int32_t shifted_log_sum_of_exps_in_q5 = - log_sum_of_exps_in_q5 + kMinInt32; - const int32_t adjusted_diff_min = - std::max(static_cast(diff_min - 1), - MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5, - reverse_scaling_divisor, - -reverse_scaling_right_shift)); - - for (size_t inner_index = 0; inner_index < depth; ++inner_index) { - int32_t input_diff = - static_cast(input_data[outer_index * depth + inner_index]) - - max_in_row; - // Note use of > below instead of >= above. - if (input_diff > adjusted_diff_min) { - const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier( - input_diff, input_multiplier, input_left_shift); - - // Rescale and downcast. - int32_t output_in_q27 = - gemmlowp::RoundingDivideByPOT( - (input_diff_in_q5 - log_sum_of_exps_in_q5), - 31 - kInputIntegerBits - kOutputIntegerBits) + - kMaxT8; - - output_in_q27 = - std::max(std::min(output_in_q27, static_cast(kMaxT8)), - static_cast(kMinT8)); - output_data[outer_index * depth + inner_index] = - static_cast(output_in_q27); - } else { - output_data[outer_index * depth + inner_index] = kMinT8; - } - } - } -} - -inline void LogSoftmax(const SoftmaxParams& params, const size_t outer_size, - const size_t depth, const RuntimeShape& input_shape, - const int8_t* input_data, - const RuntimeShape& output_shape, int8_t* output_data) { - LogSoftmaxQuantized(params, outer_size, depth, input_shape, input_data, - output_shape, output_data); -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/logistic.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/logistic.h deleted file mode 100644 index 64b7133b..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/logistic.h +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_ - -#include - -#include "fixedpoint/fixedpoint.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/op_macros.h" - -namespace tflite { -namespace reference_ops { - -inline void Logistic(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const float cutoff_upper = 16.619047164916992188f; - const float cutoff_lower = -9.f; - - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - // Rational for using approximation in reference kernel. - // 0. This approximation gives enough precision for float. - // 1. This works around an issue on an embedded chipset where exp() does not - // return correctly as expected - exp(x) should return inf when overflown - // not 1.701417 IEEE 754 defines representation for inf. - // 2. This will speed up calculation and is matching the behavior in the - // optimized kernels. (check the definition of scalar_logistic_op) - - for (int i = 0; i < flat_size; i++) { - float val = input_data[i]; - float result; - if (val > cutoff_upper) { - result = 1.0f; - } else if (val < cutoff_lower) { - result = std::exp(val); - } else { - result = 1.f / (1.f + std::exp(-val)); - } - output_data[i] = result; - } -} - -// Convenience version that allows, for example, generated-code calls to be -// uniform between data types. -inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape, - const float* input_data, const RuntimeShape& output_shape, - float* output_data) { - // Drop params: not needed. - Logistic(input_shape, input_data, output_shape, output_data); -} - -inline void Logistic(const LogisticParams& params, - const RuntimeShape& input_shape, const int16_t* input_data, - const RuntimeShape& output_shape, int16_t* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - for (int i = 0; i < flat_size; i++) { - // F0 uses 0 integer bits, range [-1, 1]. - // This is the return type of math functions such as tanh, logistic, - // whose range is in [-1, 1]. - using F0 = gemmlowp::FixedPoint; - // F3 uses 3 integer bits, range [-8, 8], the input range expected here. - using F3 = gemmlowp::FixedPoint; - - const F3 input = F3::FromRaw(input_data[i]); - F0 output = gemmlowp::logistic(input); - output_data[i] = output.raw(); - } -} - -// Quantized int8_t logistic activation. Cheats by dequantizing and -// requantizing around the floating point logistic method. This implementation -// is slow on platforms without a floating point unit. - -// TODO(b/141211002): Delete this int8_t implementation once we can reuse the -// approach used in TFLite for int8_t Logistic. -inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data, - float input_scale, int input_zero_point, - const RuntimeShape& output_shape, int8_t* output_data, - float output_scale, int output_zero_point) { - const float cutoff_upper = 16.619047164916992188f; - const float cutoff_lower = -9.f; - - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - // Rational for using approximation in reference kernel. - // 0. This approximation gives enough precision for float. - // 1. This works around an issue on an embedded chipset where exp() does not - // return correctly as expected - exp(x) should return inf when overflown - // not 1.701417 IEEE 754 defines representation for inf. - // 2. This will speed up calculation and is matching the behavior in the - // optimized kernels. (check the definition of scalar_logistic_op) - - for (int i = 0; i < flat_size; i++) { - // Dequantize. - float val = - static_cast((input_data[i] - input_zero_point) * input_scale); - float result; - if (val > cutoff_upper) { - result = 1.0f; - } else if (val < cutoff_lower) { - result = std::exp(val); - } else { - result = 1.f / (1.f + std::exp(-val)); - } - // Requantize - int8_t output = - static_cast(result / output_scale + output_zero_point); - output_data[i] = output; - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/lstm_cell.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/lstm_cell.h deleted file mode 100644 index 17b113eb..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/lstm_cell.h +++ /dev/null @@ -1,422 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_ - -#include -#include -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/reference/concatenation.h" -#include "tensorflow/lite/kernels/internal/reference/fully_connected.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -inline void LstmCell( - const LstmCellParams& params, const RuntimeShape& unextended_input_shape, - const float* input_data, const RuntimeShape& unextended_prev_activ_shape, - const float* prev_activ_data, const RuntimeShape& weights_shape, - const float* weights_data, const RuntimeShape& unextended_bias_shape, - const float* bias_data, const RuntimeShape& unextended_prev_state_shape, - const float* prev_state_data, - const RuntimeShape& unextended_output_state_shape, float* output_state_data, - const RuntimeShape& unextended_output_activ_shape, float* output_activ_data, - const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data, - const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) { - TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape prev_activ_shape = - RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); - const RuntimeShape bias_shape = - RuntimeShape::ExtendedShape(4, unextended_bias_shape); - const RuntimeShape prev_state_shape = - RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); - const RuntimeShape output_state_shape = - RuntimeShape::ExtendedShape(4, unextended_output_state_shape); - const RuntimeShape output_activ_shape = - RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); - const RuntimeShape concat_temp_shape = - RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); - const RuntimeShape activ_temp_shape = - RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); - TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); - - const int weights_dim_count = weights_shape.DimensionsCount(); - const int batches = - MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0, - output_state_shape, 0, output_activ_shape, 0); - const int height = - MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1, - output_state_shape, 1, output_activ_shape, 1); - const int width = - MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2, - output_state_shape, 2, output_activ_shape, 2); - const int input_depth = input_shape.Dims(3); - const int prev_activ_depth = prev_activ_shape.Dims(3); - const int total_input_depth = prev_activ_depth + input_depth; - TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), - total_input_depth); - TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); - const int intern_activ_depth = - MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); - TFLITE_DCHECK_EQ(weights_shape.FlatSize(), - intern_activ_depth * total_input_depth); - TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); - const int output_depth = - MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, - 3, output_activ_shape, 3); - TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); - - // Concatenate prev_activ and input data together - float const* concat_input_arrays_data[2] = {input_data, prev_activ_data}; - const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape, - &prev_activ_shape}; - tflite::ConcatenationParams concat_params; - concat_params.axis = 3; - concat_params.inputs_count = 2; - Concatenation(concat_params, concat_input_arrays_shapes, - concat_input_arrays_data, concat_temp_shape, concat_temp_data); - - // Fully connected - tflite::FullyConnectedParams fc_params; - fc_params.float_activation_min = std::numeric_limits::lowest(); - fc_params.float_activation_max = std::numeric_limits::max(); - FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, - weights_data, bias_shape, bias_data, activ_temp_shape, - activ_temp_data); - - // Memory state update (the LSTM "guts") - for (int b = 0; b < batches; ++b) { - for (int w = 0; w < width; ++w) { - for (int h = 0; h < height; ++h) { - for (int c = 0; c < output_depth; ++c) { - const float input_gate = - 1.f / - (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, - 0 * output_depth + c)])); - const float new_input = std::tanh(activ_temp_data[Offset( - activ_temp_shape, b, h, w, 1 * output_depth + c)]); - const float forget_gate = - 1.f / - (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, - 2 * output_depth + c)])); - const float output_gate = - 1.f / - (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, - 3 * output_depth + c)])); - const float new_state = - input_gate * new_input + - forget_gate * - prev_state_data[Offset(prev_state_shape, b, h, w, c)]; - output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state; - output_activ_data[Offset(output_activ_shape, b, h, w, c)] = - output_gate * std::tanh(new_state); - } - } - } - } -} - -// Quantized LSTM cell implementation. -// The quantization of the input, output arrays is as follows: -// - The input activations are quantized as uint8 on the interval -// [-1, 127/128]. -// The rationale for that is that is the natural interval for output -// activations (see next point) and these need to be concatenated together. -// We could accommodate different ranges by re-scaling, but we empirically -// found that setting the input activations range to be [-1, 127/128] in the -// first place, removing the need for re-scaling, greatly improves accuracy. -// - The output activations are quantized as uint8 on the interval -// [-1, 127/128]. -// The rationale for that is that the definition of a LSTM cell makes them -// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128] -// makes for simpler, more accurate fixed-point arithmetic. -// - The output-at-previous-timestep state array is obviously quantized as -// the output activations. -// - The internal LSTM memory (not the output-at-previous-timestep, the other -// internal state array) is int16-quantized and may use any power-of-two, -// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call -// StateIntegerBits below, see the below discussion of that template -// parameter ("The StateIntegerBits template parameter"). -// - The output of the internal fully-connected node is int16-quantized -// on the interval [-8, 8 * 32767/32768], the rationale for which is -// explained just below ("Why [-8, 8] for fully-connected output?"). -// -// -// === The StateIntegerBits template parameter === -// -// The StateIntegerBits template parameter controls the fixed-point format used -// to represent the internal memory of the LSTM cell (not the -// output-at-previous-timestep, the other internal state array). It's currently -// a template parameter so that the model can control that. The most typical -// value for StateIntegerBits is 4. Other plausible values are anywhere between -// 3 and 5. We might eventually standardize on a single supported value, e.g. 4, -// and drop that template parameter. The reason why it can't be a runtime -// parameter is that this controls the fixed-point format used, i.e. we need to -// generate actually different code based on it. In particular, we generate code -// for a fixed-point tanh() implementation for that format, which internally -// uses a fixed-point exp() implementation, which internally uses a -// barrel-shifter with a number of steps that depends on StateIntegerBits. -// Another consequence of that is that a higher value of StateIntegerBits -// results in a more expensive implementation (more barrel shifter steps -// needed). -// -// -// === Why [-8, 8] for fully-connected output? === -// -// This array is only fed to Logistic and Tanh functions, for which -// the quantized implementation will want to use fixed-point arithmetic, -// requiring a power-of-two representation interval. Thus, we should right -// away quantize this array to a power-of-two interval; otherwise, -// implementation will need to rescale that, losing any benefit that a tighter -// representation interval might otherwise yield, while introducing some -// numerical error and computational overhead. -// -// Now, Logistic and Tanh -// are nearly constant (nearly equal to their horizontal asymptotes) -// outside of a small bounded interval around 0: -// -// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4 -// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7 -// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14 -// -// From this, we see that clamping to [-4, 4] would be too inaccurate -// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision) -// while clamping to [-16, 16] would make no difference even in float32. -// However, for a fixed-point implementation in 16-bit integers, using 5 -// integer bits to represent the [-16, 16] range would leave only 11 -// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive -// representable values. Notice that is higher than the -// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic. -// Using [-8, 8] thus seems like the better compromise overall, enjoying -// an increment of 2.4e-4 between representable values and a worst-case -// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with -// [-16, 16]. -// -// Moreover, all other things being equal, it is nice to choose the narrower -// representation range, as that makes the implementation of fixed-point -// math functions a little cheaper (each integer bit requires an additional -// barrel-shifter atep in the implementation of exp(-x)). That is further -// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make -// sense for 32-bit float or 32-bit fixed-point quantization, but we are -// aiming for 16-bit fixed-point quantization of these internal nodes here. -// -template -inline void LstmCell(const LstmCellParams& params, - const RuntimeShape& unextended_input_shape, - const uint8_t* input_data_uint8, - const RuntimeShape& unextended_prev_activ_shape, - const uint8_t* prev_activ_data_uint8, - const RuntimeShape& weights_shape, - const uint8_t* weights_data_uint8, - const RuntimeShape& unextended_bias_shape, - const int32_t* bias_data_int32, - const RuntimeShape& unextended_prev_state_shape, - const int16_t* prev_state_data_int16, - const RuntimeShape& unextended_output_state_shape, - int16_t* output_state_data_int16, - const RuntimeShape& unextended_output_activ_shape, - uint8_t* output_activ_data_uint8, - const RuntimeShape& unextended_concat_temp_shape, - uint8_t* concat_temp_data_uint8, - const RuntimeShape& unextended_activ_temp_shape, - int16_t* activ_temp_data_int16, void* gemmlowp_context) { - (void)gemmlowp_context; // only used in optimized code. - int32_t weights_zero_point = params.weights_zero_point; - int32_t accum_multiplier = params.accum_multiplier; - int accum_shift = params.accum_shift; - TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape prev_activ_shape = - RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape); - const RuntimeShape bias_shape = - RuntimeShape::ExtendedShape(4, unextended_bias_shape); - const RuntimeShape prev_state_shape = - RuntimeShape::ExtendedShape(4, unextended_prev_state_shape); - const RuntimeShape output_state_shape = - RuntimeShape::ExtendedShape(4, unextended_output_state_shape); - const RuntimeShape output_activ_shape = - RuntimeShape::ExtendedShape(4, unextended_output_activ_shape); - const RuntimeShape concat_temp_shape = - RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape); - const RuntimeShape activ_temp_shape = - RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape); - TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2); - - // Gather dimensions information, and perform consistency checks. - const int weights_dim_count = weights_shape.DimensionsCount(); - const int outer_size = MatchingFlatSizeSkipDim( - input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape, - output_activ_shape); - const int input_depth = input_shape.Dims(3); - const int prev_activ_depth = prev_activ_shape.Dims(3); - const int total_input_depth = prev_activ_depth + input_depth; - TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), - total_input_depth); - const int intern_activ_depth = - MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3); - TFLITE_DCHECK_EQ(weights_shape.FlatSize(), - intern_activ_depth * total_input_depth); - TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1); - TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0); - const int output_depth = - MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape, - 3, output_activ_shape, 3); - TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4); - const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3); - const int fc_output_depth = - MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3); - const int fc_accum_depth = total_input_depth; - TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth); - - // Depth-concatenate prev_activ and input data together. - uint8_t const* concat_input_arrays_data[2] = {input_data_uint8, - prev_activ_data_uint8}; - const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape, - &prev_activ_shape}; - tflite::ConcatenationParams concat_params; - concat_params.axis = 3; - concat_params.inputs_count = 2; - Concatenation(concat_params, concat_input_arrays_shapes, - concat_input_arrays_data, concat_temp_shape, - concat_temp_data_uint8); - - // Implementation of the fully connected node inside the LSTM cell. - // The operands are 8-bit integers, the accumulators are internally 32bit - // integers, and the output is 16-bit fixed-point with 3 integer bits so - // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that - // is explained in the function comment above. - for (int b = 0; b < fc_batches; ++b) { - for (int out_c = 0; out_c < fc_output_depth; ++out_c) { - // Internal accumulation. - // Initialize accumulator with the bias-value. - int32_t accum = bias_data_int32[out_c]; - // Accumulation loop. - for (int d = 0; d < fc_accum_depth; ++d) { - int16_t input_val = - concat_temp_data_uint8[b * fc_accum_depth + d] - 128; - int16_t weights_val = - weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point; - accum += input_val * weights_val; - } - // Down-scale the final int32 accumulator to the scale used by our - // (16-bit, using 3 integer bits) fixed-point format. The quantized - // multiplier and shift here have been pre-computed offline - // (e.g. by toco). - accum = - MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift); - // Saturate, cast to int16, and store to the temporary activations array. - accum = std::max(-32768, std::min(32767, accum)); - activ_temp_data_int16[out_c + fc_output_depth * b] = accum; - } - } - - // Rest of the LSTM cell: tanh and logistic math functions, and some adds - // and muls, all done in 16-bit fixed-point. - for (int b = 0; b < outer_size; ++b) { - for (int c = 0; c < output_depth; ++c) { - // Define the fixed-point data types that we will use here. All use - // int16 as the underlying integer type i.e. all are 16-bit fixed-point. - // They only differ by the number of integral vs. fractional bits, - // determining the range of values that they can represent. - // - // F0 uses 0 integer bits, range [-1, 1]. - // This is the return type of math functions such as tanh, logistic, - // whose range is in [-1, 1]. - using F0 = gemmlowp::FixedPoint; - // F3 uses 3 integer bits, range [-8, 8]. - // This is the range of the previous fully-connected node's output, - // which is our input here. - using F3 = gemmlowp::FixedPoint; - // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits, - // 2^StateIntegerBits]. It's used to represent the internal state, whose - // number of integer bits is currently dictated by the model. See comment - // on the StateIntegerBits template parameter above. - using FS = gemmlowp::FixedPoint; - // Implementation of input gate, using fixed-point logistic function. - F3 input_gate_input = F3::FromRaw( - activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]); - F0 input_gate_output = gemmlowp::logistic(input_gate_input); - // Implementation of input modulation gate, using fixed-point tanh - // function. - F3 input_modulation_gate_input = F3::FromRaw( - activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]); - F0 input_modulation_gate_output = - gemmlowp::tanh(input_modulation_gate_input); - // Implementation of forget gate, using fixed-point logistic function. - F3 forget_gate_input = F3::FromRaw( - activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]); - F0 forget_gate_output = gemmlowp::logistic(forget_gate_input); - // Implementation of output gate, using fixed-point logistic function. - F3 output_gate_input = F3::FromRaw( - activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]); - F0 output_gate_output = gemmlowp::logistic(output_gate_input); - // Implementation of internal multiplication nodes, still in fixed-point. - F0 input_times_input_modulation = - input_gate_output * input_modulation_gate_output; - FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]); - FS prev_state_times_forget_state = forget_gate_output * prev_state; - // Implementation of internal addition node, saturating. - FS new_state = gemmlowp::SaturatingAdd( - gemmlowp::Rescale(input_times_input_modulation), - prev_state_times_forget_state); - // Implementation of last internal Tanh node, still in fixed-point. - // Since a Tanh fixed-point implementation is specialized for a given - // number or integer bits, and each specialization can have a substantial - // code size, and we already used above a Tanh on an input with 3 integer - // bits, and per the table in the above function comment there is no - // significant accuracy to be lost by clamping to [-8, +8] for a - // 3-integer-bits representation, let us just do that. This helps people - // porting this to targets where code footprint must be minimized. - F3 new_state_f3 = gemmlowp::Rescale<3>(new_state); - F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3); - // Store the new internal state back to memory, as 16-bit integers. - // Note: here we store the original value with StateIntegerBits, not - // the rescaled 3-integer-bits value fed to tanh. - output_state_data_int16[b * output_depth + c] = new_state.raw(); - // Down-scale the output activations to 8-bit integers, saturating, - // and store back to memory. - int16_t rescaled_output_activ = - gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8); - int16_t clamped_output_activ = std::max( - -128, std::min(127, rescaled_output_activ)); - output_activ_data_uint8[b * output_depth + c] = - 128 + clamped_output_activ; - } - } -} - -} // namespace reference_ops -} // namespace tflite -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h deleted file mode 100644 index cd11b419..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/maximum_minimum.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_ - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -template -void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape, - const T* input1_data, - const RuntimeShape& unextended_input2_shape, - const T* input2_data, - const RuntimeShape& unextended_output_shape, - T* output_data, Op op) { - // Uses element-wise calculation if broadcast is not required. - if (unextended_input1_shape == unextended_input2_shape) { - const int flat_size = - MatchingElementsSize(unextended_input1_shape, unextended_input2_shape, - unextended_output_shape); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = op(input1_data[i], input2_data[i]); - } - } else { - TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N); - - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast( - unextended_input1_shape, unextended_input2_shape, &desc1, &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), - &output_desc); - - auto maxmin_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - op(input1_data[SubscriptToIndex(desc1, indexes)], - input2_data[SubscriptToIndex(desc2, indexes)]); - }; - NDOpsHelper(output_desc, maxmin_func); - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/neg.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/neg.h deleted file mode 100644 index e127883f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/neg.h +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_ - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -template -inline void Negate(const RuntimeShape& input_shape, const T* input_data, - const RuntimeShape& output_shape, T* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - for (int i = 0; i < flat_size; ++i) { - output_data[i] = -input_data[i]; - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pad.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pad.h deleted file mode 100644 index 27589445..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pad.h +++ /dev/null @@ -1,169 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_ - -#include - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -// TFLite Pad supports activation tensors with up to 5 dimensions. -constexpr int PadKernelMaxDimensionCount() { return 5; } - -// There are two versions of pad: Pad and PadV2. In PadV2 there is a second -// scalar input that provides the padding value. Therefore pad_value_ptr can be -// equivalent to a simple input1_data. For Pad, it should point to a zero -// value. -// -// Note that two typenames are required, so that T=P=int32_t is considered a -// specialization distinct from P=int32_t. -template -inline void PadImpl(const tflite::PadParams& op_params, - const RuntimeShape& input_shape, const T* input_data, - const P* pad_value_ptr, const RuntimeShape& output_shape, - T* output_data) { - const RuntimeShape ext_input_shape = - RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape); - const RuntimeShape ext_output_shape = - RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape); - TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount()); - TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount()); - - // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can - // pad them to 5 dims (yes, we are "padding the padding"). - int left_padding_copy[PadKernelMaxDimensionCount()]; - for (int i = 0; i < PadKernelMaxDimensionCount(); i++) { - left_padding_copy[i] = 0; - } - for (int i = 0; i < op_params.left_padding_count; ++i) { - left_padding_copy[i + PadKernelMaxDimensionCount() - - op_params.left_padding_count] = op_params.left_padding[i]; - } - int right_padding_copy[PadKernelMaxDimensionCount()]; - for (int i = 0; i < PadKernelMaxDimensionCount(); i++) { - right_padding_copy[i] = 0; - } - for (int i = 0; i < op_params.right_padding_count; ++i) { - right_padding_copy[i + PadKernelMaxDimensionCount() - - op_params.right_padding_count] = - op_params.right_padding[i]; - } - - const int output_batch = ext_output_shape.Dims(0); - const int output_plane = ext_output_shape.Dims(1); - const int output_height = ext_output_shape.Dims(2); - const int output_width = ext_output_shape.Dims(3); - const int output_depth = ext_output_shape.Dims(4); - - const int left_b_padding = left_padding_copy[0]; - const int left_p_padding = left_padding_copy[1]; - const int left_h_padding = left_padding_copy[2]; - const int left_w_padding = left_padding_copy[3]; - const int left_d_padding = left_padding_copy[4]; - - const int right_b_padding = right_padding_copy[0]; - const int right_p_padding = right_padding_copy[1]; - const int right_h_padding = right_padding_copy[2]; - const int right_w_padding = right_padding_copy[3]; - const int right_d_padding = right_padding_copy[4]; - - const T pad_value = *pad_value_ptr; - - const T* in_ptr = input_data; - T* out_ptr = output_data; - for (int out_b = 0; out_b < output_batch; ++out_b) { - for (int out_p = 0; out_p < output_plane; ++out_p) { - for (int out_h = 0; out_h < output_height; ++out_h) { - for (int out_w = 0; out_w < output_width; ++out_w) { - for (int out_d = 0; out_d < output_depth; ++out_d) { - if (out_b < left_b_padding || - out_b >= output_batch - right_b_padding || - out_p < left_p_padding || - out_p >= output_plane - right_p_padding || - out_h < left_h_padding || - out_h >= output_height - right_h_padding || - out_w < left_w_padding || - out_w >= output_width - right_w_padding || - out_d < left_d_padding || - out_d >= output_depth - right_d_padding) { - *out_ptr++ = pad_value; - } else { - *out_ptr++ = *in_ptr++; - } - } - } - } - } - } -} - -template -inline void Pad(const tflite::PadParams& op_params, - const RuntimeShape& input_shape, const T* input_data, - const P* pad_value_ptr, const RuntimeShape& output_shape, - T* output_data) { - PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape, - output_data); -} - -// The second (pad-value) input can be int32_t when, say, the first is uint8_t. -template -inline void Pad(const tflite::PadParams& op_params, - const RuntimeShape& input_shape, const T* input_data, - const int32_t* pad_value_ptr, const RuntimeShape& output_shape, - T* output_data) { - const T converted_pad_value = static_cast(*pad_value_ptr); - PadImpl(op_params, input_shape, input_data, &converted_pad_value, - output_shape, output_data); -} - -// This version avoids conflicting template matching. -template <> -inline void Pad(const tflite::PadParams& op_params, - const RuntimeShape& input_shape, const int32_t* input_data, - const int32_t* pad_value_ptr, const RuntimeShape& output_shape, - int32_t* output_data) { - PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape, - output_data); -} - -template -inline void PadImageStyle(const tflite::PadParams& op_params, - const RuntimeShape& input_shape, const T* input_data, - const P* pad_value_ptr, - const RuntimeShape& output_shape, T* output_data) { - Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape, - output_data); -} - -template -inline void PadImageStyle(const tflite::PadParams& op_params, - const RuntimeShape& input_shape, - const float* input_data, const P* pad_value_ptr, - const RuntimeShape& output_shape, - float* output_data) { - Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape, - output_data); -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pooling.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pooling.h deleted file mode 100644 index fe17484c..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/pooling.h +++ /dev/null @@ -1,303 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -inline bool AveragePool(const PoolParams& params, - const RuntimeShape& input_shape, - const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int stride_height = params.stride_height; - const int stride_width = params.stride_width; - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int channel = 0; channel < depth; ++channel) { - const int in_x_origin = - (out_x * stride_width) - params.padding_values.width; - const int in_y_origin = - (out_y * stride_height) - params.padding_values.height; - // Compute the boundaries of the filter region clamped so as to - // ensure that the filter window fits in the input array. - const int filter_x_start = std::max(0, -in_x_origin); - const int filter_x_end = - std::min(params.filter_width, input_width - in_x_origin); - const int filter_y_start = std::max(0, -in_y_origin); - const int filter_y_end = - std::min(params.filter_height, input_height - in_y_origin); - float total = 0.f; - float filter_count = 0; - for (int filter_y = filter_y_start; filter_y < filter_y_end; - ++filter_y) { - for (int filter_x = filter_x_start; filter_x < filter_x_end; - ++filter_x) { - const int in_x = in_x_origin + filter_x; - const int in_y = in_y_origin + filter_y; - total += - input_data[Offset(input_shape, batch, in_y, in_x, channel)]; - filter_count++; - } - } - if (filter_count == 0) return false; - const float average = total / filter_count; - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - ActivationFunctionWithMinMax(average, params.float_activation_min, - params.float_activation_max); - } - } - } - } - return true; -} - -inline bool AveragePool(const PoolParams& params, - const RuntimeShape& input_shape, - const uint8_t* input_data, - const RuntimeShape& output_shape, - uint8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int stride_height = params.stride_height; - const int stride_width = params.stride_width; - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int channel = 0; channel < depth; ++channel) { - const int in_x_origin = - (out_x * stride_width) - params.padding_values.width; - const int in_y_origin = - (out_y * stride_height) - params.padding_values.height; - // Compute the boundaries of the filter region clamped so as to - // ensure that the filter window fits in the input array. - const int filter_x_start = std::max(0, -in_x_origin); - const int filter_x_end = - std::min(params.filter_width, input_width - in_x_origin); - const int filter_y_start = std::max(0, -in_y_origin); - const int filter_y_end = - std::min(params.filter_height, input_height - in_y_origin); - int32_t acc = 0; - int filter_count = 0; - for (int filter_y = filter_y_start; filter_y < filter_y_end; - ++filter_y) { - for (int filter_x = filter_x_start; filter_x < filter_x_end; - ++filter_x) { - const int in_x = in_x_origin + filter_x; - const int in_y = in_y_origin + filter_y; - acc += - input_data[Offset(input_shape, batch, in_y, in_x, channel)]; - filter_count++; - } - } - if (filter_count == 0) return false; - acc = (acc + filter_count / 2) / filter_count; - acc = std::max(acc, params.quantized_activation_min); - acc = std::min(acc, params.quantized_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - static_cast(acc); - } - } - } - } - return true; -} - -inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape, - const float* input_data, const RuntimeShape& output_shape, - float* output_data) { - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int stride_height = params.stride_height; - const int stride_width = params.stride_width; - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int channel = 0; channel < depth; ++channel) { - const int in_x_origin = - (out_x * stride_width) - params.padding_values.width; - const int in_y_origin = - (out_y * stride_height) - params.padding_values.height; - // Compute the boundaries of the filter region clamped so as to - // ensure that the filter window fits in the input array. - const int filter_x_start = std::max(0, -in_x_origin); - const int filter_x_end = - std::min(params.filter_width, input_width - in_x_origin); - const int filter_y_start = std::max(0, -in_y_origin); - const int filter_y_end = - std::min(params.filter_height, input_height - in_y_origin); - float sum_squares = 0.f; - int filter_count = 0; - for (int filter_y = filter_y_start; filter_y < filter_y_end; - ++filter_y) { - for (int filter_x = filter_x_start; filter_x < filter_x_end; - ++filter_x) { - const int in_x = in_x_origin + filter_x; - const int in_y = in_y_origin + filter_y; - const float val = - input_data[Offset(input_shape, batch, in_y, in_x, channel)]; - sum_squares += val * val; - filter_count++; - } - } - const float l2pool_result = std::sqrt(sum_squares / filter_count); - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - ActivationFunctionWithMinMax(l2pool_result, - params.float_activation_min, - params.float_activation_max); - } - } - } - } -} - -inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, - const float* input_data, const RuntimeShape& output_shape, - float* output_data) { - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int stride_height = params.stride_height; - const int stride_width = params.stride_width; - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int channel = 0; channel < depth; ++channel) { - const int in_x_origin = - (out_x * stride_width) - params.padding_values.width; - const int in_y_origin = - (out_y * stride_height) - params.padding_values.height; - // Compute the boundaries of the filter region clamped so as to - // ensure that the filter window fits in the input array. - const int filter_x_start = std::max(0, -in_x_origin); - const int filter_x_end = - std::min(params.filter_width, input_width - in_x_origin); - const int filter_y_start = std::max(0, -in_y_origin); - const int filter_y_end = - std::min(params.filter_height, input_height - in_y_origin); - float max = std::numeric_limits::lowest(); - for (int filter_y = filter_y_start; filter_y < filter_y_end; - ++filter_y) { - for (int filter_x = filter_x_start; filter_x < filter_x_end; - ++filter_x) { - const int in_x = in_x_origin + filter_x; - const int in_y = in_y_origin + filter_y; - max = std::max( - max, - input_data[Offset(input_shape, batch, in_y, in_x, channel)]); - } - } - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - ActivationFunctionWithMinMax(max, params.float_activation_min, - params.float_activation_max); - } - } - } - } -} - -inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, - const uint8_t* input_data, const RuntimeShape& output_shape, - uint8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - TFLITE_DCHECK_GE(params.quantized_activation_min, 0); - TFLITE_DCHECK_LE(params.quantized_activation_max, 255); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int stride_height = params.stride_height; - const int stride_width = params.stride_width; - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int channel = 0; channel < depth; ++channel) { - const int in_x_origin = - (out_x * stride_width) - params.padding_values.width; - const int in_y_origin = - (out_y * stride_height) - params.padding_values.height; - // Compute the boundaries of the filter region clamped so as to - // ensure that the filter window fits in the input array. - const int filter_x_start = std::max(0, -in_x_origin); - const int filter_x_end = - std::min(params.filter_width, input_width - in_x_origin); - const int filter_y_start = std::max(0, -in_y_origin); - const int filter_y_end = - std::min(params.filter_height, input_height - in_y_origin); - uint8_t max = 0; - for (int filter_y = filter_y_start; filter_y < filter_y_end; - ++filter_y) { - for (int filter_x = filter_x_start; filter_x < filter_x_end; - ++filter_x) { - const int in_x = in_x_origin + filter_x; - const int in_y = in_y_origin + filter_y; - max = std::max( - max, - input_data[Offset(input_shape, batch, in_y, in_x, channel)]); - } - } - max = std::max(max, params.quantized_activation_min); - max = std::min(max, params.quantized_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - static_cast(max); - } - } - } - } -} -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc deleted file mode 100644 index 4684be64..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc +++ /dev/null @@ -1,809 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include -#include -#include -#include -#include -#include - -#include "fixedpoint/fixedpoint.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h" - -#if defined(_MSC_VER) -#define __restrict__ __restrict -#endif - -namespace tflite { -namespace tensor_utils { - -namespace { -const int32_t kInt16Max = std::numeric_limits::max(); -const int32_t kInt16Min = std::numeric_limits::min(); -} // namespace - -void PortableSymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float* min_value, - float* max_value, float* scaling_factor) { - auto minmax = std::minmax_element(values, values + size); - *min_value = *minmax.first; - *max_value = *minmax.second; - - PortableSymmetricQuantizeFloats(values, size, quantized_values, *min_value, - *max_value, scaling_factor); -} - -void PortableSymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float min_value, - float max_value, float* scaling_factor) { - const int32_t kScale = 127; - const float range = std::max(std::abs(min_value), std::abs(max_value)); - if (range == 0) { - memset(quantized_values, 0, size * sizeof(int8_t)); - *scaling_factor = 1; - return; - } - *scaling_factor = range / kScale; - const float scaling_factor_inv = kScale / range; - for (int i = 0; i < size; ++i) { - const int32_t quantized_value = - static_cast(TfLiteRound(values[i] * scaling_factor_inv)); - // Clamp: just in case some odd numeric offset. - quantized_values[i] = static_cast( - std::min(kScale, std::max(-kScale, quantized_value))); - } -} - -void PortableAsymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, - float* scaling_factor, int32_t* offset) { - const int32_t kMinScale = -128; - const int32_t kMaxScale = 127; - const double qmin_double = kMinScale; - const double qmax_double = kMaxScale; - const auto minmax = std::minmax_element(values, values + size); - const double rmin = static_cast(std::min(0.0f, *minmax.first)); - const double rmax = static_cast(std::max(0.0f, *minmax.second)); - if (rmin == rmax) { - memset(quantized_values, 0, size * sizeof(int8_t)); - *scaling_factor = 1; - *offset = 0; - return; - } else { - double scale = (rmax - rmin) / (qmax_double - qmin_double); - const double zero_point_from_min = qmin_double - rmin / scale; - const double zero_point_from_max = qmax_double - rmax / scale; - const double zero_point_from_min_error = - std::abs(qmin_double) + std::abs(rmin / scale); - const double zero_point_from_max_error = - std::abs(qmax_double) + std::abs(rmax / scale); - const double zero_point_double = - zero_point_from_min_error < zero_point_from_max_error - ? zero_point_from_min - : zero_point_from_max; - int8_t nudged_zero_point = 0; - if (zero_point_double <= qmin_double) { - nudged_zero_point = kMinScale; - } else if (zero_point_double >= qmax_double) { - nudged_zero_point = kMaxScale; - } else { - nudged_zero_point = static_cast(round(zero_point_double)); - } - *scaling_factor = scale; - *offset = nudged_zero_point; - } - const float scaling_factor_inv = 1.0f / *scaling_factor; - for (int i = 0; i < size; ++i) { - const int32_t quantized_value = static_cast( - TfLiteRound(*offset + values[i] * scaling_factor_inv)); - quantized_values[i] = - std::min(kMaxScale, std::max(kMinScale, quantized_value)); - } -} - -void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix, - int m_rows, int m_cols, - const float* vector, - int n_batch, float* result) { - float* result_in_batch = result; - for (int b = 0; b < n_batch; b++) { - const float* matrix_ptr = matrix; - for (int r = 0; r < m_rows; r++) { - float dot_prod = 0.0f; - const float* vector_in_batch = vector + b * m_cols; - for (int c = 0; c < m_cols; c++) { - dot_prod += *matrix_ptr++ * *vector_in_batch++; - } - *result_in_batch += dot_prod; - ++result_in_batch; - } - } -} - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, const float* scaling_factors, - int n_batch, float* __restrict__ result) { - for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) { - const float batch_scaling_factor = scaling_factors[batch]; - // Get the address of the first row. - const int8_t* row_ptr = matrix; - for (int row = 0; row < m_rows; ++row) { - // Initialize the dot product sum for the row to 0. - int32_t dotprod = 0; -#if defined(__GNUC__) - // Prefetch the row to cache. - __builtin_prefetch(row_ptr, 0 /* prefetch for read */, - 3 /* temporal locality */); -#endif - for (int col = 0; col < m_cols; ++col, ++row_ptr) { - dotprod += (*row_ptr) * (vectors[col]); - } // for col - *result += dotprod * batch_scaling_factor; - ++result; - } // for row - } // for batch -} - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, const float* scaling_factors, - int n_batch, float* __restrict__ result, const float* per_channel_scale, - const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, - bool* compute_row_sums, CpuBackendContext* context) { - if (input_offset == nullptr) { - PortableMatrixBatchVectorMultiplyAccumulate( - matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result); - return; - } - if (!compute_row_sums || *compute_row_sums) { - PortableReductionSumVector(matrix, row_sums, m_rows, m_cols); - if (compute_row_sums) { - *compute_row_sums = false; - } - } - - for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) { - const float batch_scaling_factor = scaling_factors[batch]; - const int32_t batch_offset = input_offset[batch]; - const int8_t* row_ptr = matrix; - for (int row = 0; row < m_rows; ++row) { - int32_t dotprod = 0; - float scale = batch_scaling_factor; - if (per_channel_scale) { - scale *= per_channel_scale[row]; - } -#if defined(__GNUC__) - // Prefetch the row to cache. - __builtin_prefetch(row_ptr, 0 /* prefetch for read */, - 3 /* temporal locality */); -#endif - for (int col = 0; col < m_cols; ++col, ++row_ptr) { - dotprod += (*row_ptr) * vectors[col]; - } // for col - dotprod -= row_sums[row] * batch_offset; - *result += dotprod * scale; - ++result; - } // for row - } // for batch -} - -void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4( - const float* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const float* __restrict__ vector, int n_batch, float* __restrict__ result) { - const int kBlockSize = 4; - TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0); - for (int batch = 0; batch < n_batch; batch++) { - const float* matrix_ptr = matrix; - for (int row = 0; row < m_rows; row++) { - float dot_prod = 0.0f; - const float* vector_in_batch = vector + batch * m_cols; - for (int i = segments[row]; i < segments[row + 1]; i++) { - const int block_start_index = indices[i] * kBlockSize; - const float* vector_block_in_batch_ptr = - vector_in_batch + block_start_index; - for (int c = 0; c < kBlockSize; c++) { - dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++; - } - } - result[batch * m_rows + row] += dot_prod; - } - } -} - -void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16( - const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector, - int n_batch, const int32_t input_offset, const int32_t output_multiplier, - const int32_t output_shift, const int32_t output_offset, - const int32_t output_activation_min, const int32_t output_activation_max, - int8_t* __restrict__ result) { - const int kBlockSize = 16; - TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0); - for (int batch = 0; batch < n_batch; ++batch) { - const int8_t* matrix_ptr = matrix; - for (int row = 0; row < m_rows; ++row) { - int32_t dot_prod = 0; - const int8_t* vector_in_batch = vector + batch * m_cols; - for (int i = segments[row]; i < segments[row + 1]; ++i) { - const int block_start_index = indices[i] * kBlockSize; - const int8_t* vector_block_in_batch_ptr = - vector_in_batch + block_start_index; - for (int c = 0; c < kBlockSize; c++) { - dot_prod += *matrix_ptr * *vector_block_in_batch_ptr++; - dot_prod += *matrix_ptr++ * input_offset; - } - } - const int32_t bias_value = bias_vector != nullptr ? bias_vector[row] : 0; - dot_prod = MultiplyByQuantizedMultiplier(dot_prod + bias_value, - output_multiplier, output_shift); - dot_prod += output_offset; - result[batch * m_rows + row] = - static_cast(ActivationFunctionWithMinMax( - dot_prod, output_activation_min, output_activation_max)); - } - } -} - -void PortableSparseMatrixBatchVectorMultiplyAccumulate( - const float* __restrict__ matrix, const uint8_t* __restrict__ ledger, - int m_rows, int m_cols, const float* __restrict__ vector, int n_batch, - float* __restrict__ result) { - const int kBlockSize = 16; - TFLITE_DCHECK_EQ( // NOLINT - m_cols % kBlockSize, 0); - for (int batch = 0; batch < n_batch; batch++) { - const float* matrix_ptr = matrix; - const uint8_t* ledger_ptr = ledger; - for (int row = 0; row < m_rows; row++) { - float dot_prod = 0.0f; - int num_nonzero_blocks = *ledger_ptr++; - if (num_nonzero_blocks > 0) { - const float* vector_in_batch = vector + batch * m_cols; - for (int i = 0; i < num_nonzero_blocks; i++) { - const int block_start_index = *ledger_ptr++ * kBlockSize; - const float* vector_block_in_batch_ptr = - vector_in_batch + block_start_index; - for (int c = 0; c < kBlockSize; c++) { - dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++; - } - } - } - result[batch * m_rows + row] += dot_prod; - } - } -} - -void PortableSparseMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows, - const int m_cols, const int8_t* __restrict__ vectors, - const float* scaling_factors, int n_batch, float* __restrict__ result) { - static const int kBlockSize = 16; - TFLITE_DCHECK_EQ( // NOLINT - m_cols % kBlockSize, 0); - for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) { - const float batch_scaling_factor = scaling_factors[batch]; - const uint8_t* ledger_ptr = ledger; - // Get the address of the first row. - const int8_t* row_ptr = matrix; - for (int row = 0; row < m_rows; ++row) { - // Initialize the dot product sum for the row to 0. - int32_t dotprod = 0; -#if defined(__GNUC__) - // Prefetch the row to cache. - __builtin_prefetch(row_ptr, 0 /* prefetch for read */, - 3 /* temporal locality */); -#endif - int num_nonzero_blocks = *ledger_ptr++; - for (int i = 0; i < num_nonzero_blocks; i++) { - const int block_start_index = *ledger_ptr++ * kBlockSize; - const int8_t* vector_block_ptr = vectors + block_start_index; - for (int c = 0; c < kBlockSize; c++) { - dotprod += (*row_ptr++) * (*vector_block_ptr++); - } // for block - } // for num_nonzero_blocks - result[batch * m_rows + row] += dotprod * batch_scaling_factor; - } // for row - } // for batch -} - -template -void PortableMatrixBatchVectorMultiplyAccumulateImpl( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - T* output) { - const int16_t output_max = std::numeric_limits::max(); - const int16_t output_min = std::numeric_limits::min(); - for (int batch = 0; batch < n_batch; ++batch) { - for (int row = 0; row < n_output; ++row) { - int32_t acc = bias[row]; - for (int col = 0; col < n_input; ++col) { - int8_t input_val = input[batch * n_input + col]; - int8_t weights_val = input_to_gate_weights[row * n_input + col]; - acc += input_val * weights_val; - } - acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift); - acc += output_zp; - acc += output[batch * n_output + row]; - if (acc > output_max) { - acc = output_max; - } - if (acc < output_min) { - acc = output_min; - } - output[batch * n_output + row] = static_cast(acc); - } - } -} - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - int32_t* scratch, int16_t* output, CpuBackendContext* context) { - PortableMatrixBatchVectorMultiplyAccumulateImpl( - input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input, - n_output, output_zp, output); -} - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - int32_t* scratch, int8_t* output, CpuBackendContext* context) { - PortableMatrixBatchVectorMultiplyAccumulateImpl( - input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input, - n_output, output_zp, output); -} - -void PortableMatrixBatchVectorMultiply(const int8_t* input, - int32_t input_zeropoint, - const int8_t* input_to_gate_weights, - int32_t input_to_gate_effective_scale_a, - int32_t input_to_gate_effective_scale_b, - int32_t n_batch, int32_t n_input, - int32_t n_cell, int8_t* gate_output, - int8_t gate_output_zp) { - const int32_t int8_max = std::numeric_limits::max(); - const int32_t int8_min = std::numeric_limits::min(); - for (int batch = 0; batch < n_batch; ++batch) { - for (int row = 0; row < n_cell; ++row) { - int32_t acc = 0; - for (int col = 0; col < n_input; ++col) { - int32_t input_val = input[batch * n_input + col]; - int8_t weights_val = input_to_gate_weights[row * n_input + col]; - acc += (input_val - input_zeropoint) * weights_val; - } - acc = MultiplyByQuantizedMultiplier(acc, input_to_gate_effective_scale_a, - input_to_gate_effective_scale_b); - acc += gate_output_zp; - if (acc > int8_max) { - acc = int8_max; - } - if (acc < int8_min) { - acc = int8_min; - } - gate_output[batch * n_cell + row] = static_cast(acc); - } - } -} - -void PortableMatrixBatchVectorMultiply( - const int16_t* hidden, const int8_t* hidden_to_output_weights, - int32_t proj_effective_scale_a, int32_t proj_effective_scale_b, - const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden, - int32_t n_output, int32_t output_zp, int8_t* proj_output) { - const int16_t int8_max = std::numeric_limits::max(); - const int16_t int8_min = std::numeric_limits::min(); - for (int batch = 0; batch < n_batch; ++batch) { - for (int row = 0; row < n_output; ++row) { - int64_t acc = gate_bias[row]; - for (int col = 0; col < n_hidden; ++col) { - int16_t input_val = hidden[batch * n_hidden + col]; - int8_t weights_val = hidden_to_output_weights[row * n_hidden + col]; - int64_t curr = acc; - acc += input_val * weights_val; - if (input_val * weights_val > 0 && acc < curr) { - acc = std::numeric_limits::max(); - } - if (input_val * weights_val < 0 && acc > curr) { - acc = std::numeric_limits::min(); - } - } - acc = MultiplyByQuantizedMultiplier(acc, proj_effective_scale_a, - proj_effective_scale_b); - acc += output_zp; - if (acc > int8_max) { - acc = int8_max; - } - if (acc < int8_min) { - acc = int8_min; - } - proj_output[batch * n_output + row] = acc; - } - } -} - -void PortableApplyLayerNorm(const int16_t* input, - const int16_t* layer_norm_weights, - const int32_t* bias, int32_t layer_norm_scale_a, - int32_t layer_norm_scale_b, int32_t variance_limit, - int n_batch, int n_input, int16_t* output) { - // The square of std::pow(2, 10), which is the extra factor that makes sure - // normalized values has enough resolution. - static const int kTwoToPower20 = 1 << 20; - for (int i = 0; i < n_batch; ++i) { - int64_t sum = 0; - int64_t sum_sq = 0; - for (int j = 0; j < n_input; ++j) { - const int32_t index = i * n_input + j; - int32_t val = static_cast(input[index]); - sum += val; - sum_sq += val * val; - } - int32_t mean = - static_cast(static_cast(sum) * 1024 / n_input); - // TODO(b/173994730): Avoids overflow but only works for POT n_input. - int32_t temp = kTwoToPower20 / n_input; - int64_t variance = - sum_sq * temp - static_cast(mean) * static_cast(mean); - int32_t variance2 = static_cast(variance / kTwoToPower20); - if (variance2 < 1) { - variance2 = variance_limit; - } - int32_t stddev_inverse_a; - int stddev_inverse_b; - GetInvSqrtQuantizedMultiplierExp(variance2, /*reverse_shift*/ -1, - &stddev_inverse_a, &stddev_inverse_b); - - for (int j = 0; j < n_input; ++j) { - const int32_t index = i * n_input + j; - int32_t val = static_cast(input[index]); - int32_t shifted = 1024 * val - mean; - int32_t rescaled = MultiplyByQuantizedMultiplier( - shifted, stddev_inverse_a, stddev_inverse_b); - // TODO(jianlijianli): Saturate this. - int64_t val3 = rescaled * layer_norm_weights[j] + bias[j]; - int32_t val4 = - static_cast((val3 > 0 ? val3 + 512 : val3 - 512) / 1024); - int32_t val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a, - layer_norm_scale_b + 12); - val5 = std::min(std::max(kInt16Min, val5), kInt16Max); - output[index] = static_cast(val5); - } - } -} - -void PortableApplyLayerNormFloat(const int16_t* input, - const int16_t* layer_norm_weights, - int32_t layer_norm_scale_a, - int32_t layer_norm_scale_b, - const int32_t* bias, int n_batch, int n_input, - int16_t* output) { - const int32_t int16_max = std::numeric_limits::max(); - const int32_t int16_min = std::numeric_limits::min(); - const float layer_norm_scale = - layer_norm_scale_a * - std::pow(2.0, static_cast(layer_norm_scale_b - 31)); - const float bias_scale = - static_cast(std::pow(2.0, -10)) * layer_norm_scale; - - for (int batch = 0; batch < n_batch; ++batch) { - float sum = 0.0f; - float sum_sq = 0.0f; - for (int i = 0; i < n_input; ++i) { - const int index = batch * n_input + i; - const float value = static_cast(input[index]); - sum += value; - sum_sq += value * value; - } - const float mean = sum / n_input; - float stddev_inv = 0.0f; - const float variance = sum_sq / n_input - mean * mean; - if (variance == 0) { - stddev_inv = 1.0f / std::sqrt(1e-8f); - } else { - stddev_inv = 1.0f / std::sqrt(variance); - } - for (int i = 0; i < n_input; ++i) { - const int index = batch * n_input + i; - const float normalized_value = - (static_cast(input[index]) - mean) * stddev_inv; - const float weighted_normalized_value = - normalized_value * layer_norm_weights[i] * layer_norm_scale + - bias[i] * bias_scale; - const int32_t quant_output = static_cast(round( - weighted_normalized_value * static_cast(std::pow(2, 12)))); - output[index] = std::min(int16_max, std::max(int16_min, quant_output)); - } - } -} - -void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix, - int32_t scalar, int32_t n_row, - int32_t n_col, int32_t* output) { - for (int i = 0; i < n_row; ++i) { - int32_t row_sum = 0; - for (int j = 0; j < n_col; ++j) { - row_sum += *matrix++; - } - output[i] += row_sum * scalar; - } -} - -void PortableApplySigmoid(const int16_t* input, int32_t n_batch, - int32_t n_input, int16_t* output) { - for (int batch = 0; batch < n_batch; ++batch) { - for (int c = 0; c < n_input; c++) { - using F3 = gemmlowp::FixedPoint; - using F0 = gemmlowp::FixedPoint; - const int index = batch * n_input + c; - F3 sigmoid_input = F3::FromRaw(input[index]); - F0 sigmoid_output = gemmlowp::logistic(sigmoid_input); - output[index] = sigmoid_output.raw(); - } - } -} - -void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch, - int32_t n_input, int16_t* output) { - const int32_t int16_max = std::numeric_limits::max(); - const int32_t int16_min = std::numeric_limits::min(); - for (int batch = 0; batch < n_batch; ++batch) { - for (int i = 0; i < n_input; ++i) { - const int index = batch * n_input + i; - const float float_input = - input[index] * static_cast(std::pow(2, -12)); - const float float_output = 1.0f / (1.0f + std::exp(-float_input)); - const int32_t quant_output = static_cast( - float_output * static_cast(std::pow(2, 15))); - const int32_t quant_output_clamped = - std::min(int16_max, std::max(int16_min, quant_output)); - output[index] = static_cast(quant_output_clamped); - } - } -} - -template -void PortableApplyTanhImpl(const int16_t* input, int32_t n_batch, - int32_t n_input, int16_t* output) { - using FX = gemmlowp::FixedPoint; - using F0 = gemmlowp::FixedPoint; - for (int batch = 0; batch < n_batch; ++batch) { - for (int i = 0; i < n_input; ++i) { - const int index = batch * n_input + i; - FX tanh_input = FX::FromRaw(input[index]); - F0 tanh_output = gemmlowp::tanh(tanh_input); - output[index] = tanh_output.raw(); - } - } -} - -void PortableApplyTanh(int32_t integer_bits, const int16_t* input, - int32_t n_batch, int32_t n_input, int16_t* output) { - assert(integer_bits <= 6); -#define DISPATCH_TANH(i) \ - case i: \ - PortableApplyTanhImpl(input, n_batch, n_input, output); \ - break; - switch (integer_bits) { - DISPATCH_TANH(0); - DISPATCH_TANH(1); - DISPATCH_TANH(2); - DISPATCH_TANH(3); - DISPATCH_TANH(4); - DISPATCH_TANH(5); - DISPATCH_TANH(6); - default: - return; - } -#undef DISPATCH_TANH -} - -void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch, - int32_t n_input, int32_t integer_bits, - int16_t* output) { - const int32_t int16_max = std::numeric_limits::max(); - const int32_t int16_min = std::numeric_limits::min(); - const double two = 2.0; - for (int batch = 0; batch < n_batch; ++batch) { - for (int i = 0; i < n_input; ++i) { - const int index = batch * n_input + i; - const float float_input = - input[index] * std::pow(two, static_cast(integer_bits)); - const float float_output = std::tanh(float_input); - const int32_t quant_output = static_cast( - float_output * static_cast(std::pow(2, 15))); - const int32_t quant_output_clamped = - std::min(int16_max, std::max(int16_min, quant_output)); - output[index] = static_cast(quant_output_clamped); - } - } -} - -void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, - int n_batch, int n_input, int shift, int16_t* output) { - for (int batch = 0; batch < n_batch; ++batch) { - for (int i = 0; i < n_input; ++i) { - const int index = batch * n_input + i; - const int16_t a = input_1[index]; - const int16_t b = input_2[index]; - const int32_t value = static_cast(a) * static_cast(b); - output[index] = - static_cast(gemmlowp::RoundingDivideByPOT(value, shift)); - } - } -} - -void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, - int32_t multiplier, int32_t shift, int32_t n_batch, - int32_t n_input, int32_t output_zp, int8_t* output) { - for (int batch = 0; batch < n_batch; ++batch) { - for (int i = 0; i < n_input; ++i) { - const int index = batch * n_input + i; - const int16_t a = input_1[index]; - const int16_t b = input_2[index]; - int32_t value = static_cast(a) * static_cast(b); - value = MultiplyByQuantizedMultiplier(value, multiplier, shift); - value -= output_zp; - value = std::min(std::max(static_cast(-128), value), - static_cast(127)); - - output[index] = static_cast(value); - } - } -} - -void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2, - int n_batch, int n_input, int16_t* output) { - for (int batch = 0; batch < n_batch; ++batch) { - for (int i = 0; i < n_input; ++i) { - const int index = batch * n_input + i; - int32_t sum = input_1[index] + input_2[index]; - const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum)); - output[index] = static_cast(sum_clamped); - } - } -} - -float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, - int v_size) { - float result = 0.0; - for (int v = 0; v < v_size; v++) { - result += *vector1++ * *vector2++; - } - return result; -} - -namespace { -inline int32_t VectorVectorDotProduct(const int16_t* vector1, - const int16_t* vector2, int v_size) { - int32_t result = 0; - for (int v = 0; v < v_size; v++) { - result += *vector1++ * *vector2++; - } - return result; -} -} // namespace - -void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1, - const int16_t* vector2, - int v_size, int n_batch, - int32_t* result) { - for (int b = 0; b < n_batch; b++) { - result[b] = VectorVectorDotProduct(vector1, vector2, v_size); - vector1 += v_size; - vector2 += v_size; - } -} - -void PortableVectorBatchVectorCwiseProductAccumulate( - const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch, - int32_t multiplier, int shift, int16_t* result) { - for (int b = 0; b < n_batch; b++) { - for (int v = 0; v < v_size; v++) { - int32_t prod = vector[v] * *batch_vector++; - prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift); - int32_t output = prod + *result; - output = std::max(std::min(static_cast(32767), output), - static_cast(-32768)); - *result++ = output; - } - } -} - -void PortableSub1Vector(const float* vector, int v_size, float* result) { - for (int v = 0; v < v_size; v++) { - *result++ = 1.0f - *vector++; - } -} - -void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result) { - static const int16_t kOne = 32767; - for (int v = 0; v < v_size; v++) { - *result++ = kOne - *vector++; - } -} - -void PortableVectorScalarMultiply(const int8_t* vector, const int v_size, - const float scale, float* result) { - for (int v = 0; v < v_size; ++v) { - *result++ = scale * *vector++; - } -} - -void PortableMeanStddevNormalization(const float* __restrict__ input_vector, - float* __restrict__ output_vector, - int v_size, int n_batch) { - for (int batch = 0; batch < n_batch; ++batch) { - float sum = 0.0f; - for (int i = 0; i < v_size; ++i) { - sum += input_vector[i]; - } - const float mean = sum / v_size; - float sum_diff_sq = 0.0f; - for (int i = 0; i < v_size; ++i) { - const float diff = input_vector[i] - mean; - sum_diff_sq += diff * diff; - } - const float variance = sum_diff_sq / v_size; - constexpr float kNormalizationConstant = 1e-8f; - const float stddev_inv = - 1.0f / std::sqrt(variance + kNormalizationConstant); - for (int i = 0; i < v_size; ++i) { - output_vector[i] = (input_vector[i] - mean) * stddev_inv; - } - input_vector += v_size; - output_vector += v_size; - } -} - -void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, - const int8_t* recurrent, int8_t recurrent_zp, - int32_t input_effective_scale_a, - int32_t input_effective_scale_b, - int32_t recurrent_effective_scale_a, - int32_t recurrent_effective_scale_b, - int32_t n_batch, int32_t n_cell, - int16_t* output) { - const int32_t int16_max = std::numeric_limits::max(); - const int32_t int16_min = std::numeric_limits::min(); - for (int i = 0; i < n_batch * n_cell; ++i) { - int32_t x = static_cast(input[i]) - static_cast(input_zp); - int32_t h = - static_cast(recurrent[i]) - static_cast(recurrent_zp); - int32_t x_scaled = MultiplyByQuantizedMultiplier(x, input_effective_scale_a, - input_effective_scale_b); - int32_t h_scaled = MultiplyByQuantizedMultiplier( - h, recurrent_effective_scale_a, recurrent_effective_scale_b); - int32_t y = h_scaled + x_scaled; - if (y > int16_max) { - y = int16_max; - } - if (y < int16_min) { - y = int16_min; - } - output[i] = static_cast(y); - } -} - -} // namespace tensor_utils -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h deleted file mode 100644 index 0416db09..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h +++ /dev/null @@ -1,333 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_ - -#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h" - -#if defined(_MSC_VER) -#define __restrict__ __restrict -#endif - -namespace tflite { -namespace tensor_utils { - -// Check if all entries of a vector are zero for float. -bool IsZeroVector(const float* vector, int v_size) { - return PortableIsZeroVector(vector, v_size); -} - -// Check if all entries of a vector are zero for int8_t. -bool IsZeroVector(const int8_t* vector, int v_size) { - return PortableIsZeroVector(vector, v_size); -} - -void SymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float* min, float* max, - float* scaling_factor) { - PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max, - scaling_factor); -} - -void SymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float min_value, - float max_value, float* scaling_factor) { - PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value, - max_value, scaling_factor); -} - -void AsymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float* scaling_factor, - int32_t* offset) { - PortableAsymmetricQuantizeFloats(values, size, quantized_values, - scaling_factor, offset); -} - -void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, - int m_cols, const float* vector, - int n_batch, float* result) { - PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, - n_batch, result); -} - -void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix, - const int m_rows, const int m_cols, - const int8_t* __restrict__ vector, - const float* scaling_factors, - int n_batch, - float* __restrict__ result) { - PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, - scaling_factors, n_batch, result); -} - -void MatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, const float* scaling_factors, - int n_batch, float* __restrict__ result, const float* per_channel_scale, - const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, - bool* compute_row_sums, CpuBackendContext* context) { - PortableMatrixBatchVectorMultiplyAccumulate( - matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result, - per_channel_scale, input_offset, scratch, row_sums, compute_row_sums, - context); -} - -void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix, - const int m_rows, const int m_cols, - const int8_t* __restrict__ vector, - const float* scaling_factors, - int n_batch, int32_t* scratch, - float* __restrict__ result, - CpuBackendContext* context) { - PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, - scaling_factors, n_batch, result); -} - -void SparseMatrixBatchVectorMultiplyAccumulate1x4( - const float* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const float* __restrict__ vector, int n_batch, float* __restrict__ result) { - PortableSparseMatrixBatchVectorMultiplyAccumulate1x4( - matrix, segments, indices, m_rows, m_cols, vector, n_batch, result); -} - -void SparseMatrixBatchVectorMultiplyAccumulate( - const float* __restrict__ matrix, const uint8_t* __restrict__ ledger, - int m_rows, int m_cols, const float* __restrict__ vector, int n_batch, - float* __restrict__ result) { - PortableSparseMatrixBatchVectorMultiplyAccumulate( - matrix, ledger, m_rows, m_cols, vector, n_batch, result); -} - -void SparseMatrixBatchVectorMultiplyAccumulate1x16( - const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector, - int n_batch, const int32_t input_offset, const int32_t output_multiplier, - const int32_t output_shift, const int32_t output_offset, - const int32_t output_activation_min, const int32_t output_activation_max, - - int8_t* __restrict__ result) { - PortableSparseMatrixBatchVectorMultiplyAccumulate1x16( - matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch, - input_offset, output_multiplier, output_shift, output_offset, - output_activation_min, output_activation_max, result); -} - -void SparseMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows, - const int m_cols, const int8_t* __restrict__ vectors, - const float* scaling_factors, int n_batch, float* __restrict__ result) { - PortableSparseMatrixBatchVectorMultiplyAccumulate( - matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch, - result); -} - -void MatrixBatchVectorMultiplyAccumulate( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - int32_t* scratch, int16_t* output, CpuBackendContext* context) { - PortableMatrixBatchVectorMultiplyAccumulate( - input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input, - n_output, output_zp, scratch, output, context); -} - -void MatrixBatchVectorMultiplyAccumulate( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - int32_t* scratch, int8_t* output, CpuBackendContext* context) { - PortableMatrixBatchVectorMultiplyAccumulate( - input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input, - n_output, output_zp, scratch, output, context); -} - -void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar, - int32_t n_row, int32_t n_col, - int32_t* output) { - PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output); -} - -void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint, - const int8_t* input_to_gate_weights, - int32_t input_to_gate_effective_scale_a, - int32_t input_to_gate_effective_scale_b, - int32_t n_batch, int32_t n_input, int32_t n_cell, - int8_t* gate_output, int8_t gate_output_zp) { - PortableMatrixBatchVectorMultiply( - input, input_zeropoint, input_to_gate_weights, - input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch, - n_input, n_cell, gate_output, gate_output_zp); -} - -void MatrixBatchVectorMultiply(const int16_t* hidden, - const int8_t* hidden_to_output_weights, - int32_t proj_effective_scale_a, - int32_t proj_effective_scale_b, - const int32_t* gate_bias, int32_t n_batch, - int32_t n_hidden, int32_t n_output, - int32_t output_zp, int8_t* proj_output) { - PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights, - proj_effective_scale_a, - proj_effective_scale_b, gate_bias, n_batch, - n_hidden, n_output, output_zp, proj_output); -} - -void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights, - const int32_t* bias, int32_t layer_norm_scale_a, - int32_t layer_norm_scale_b, int32_t variance_limit, - int n_batch, int n_input, int16_t* output) { - PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a, - layer_norm_scale_b, variance_limit, n_batch, n_input, - output); -} - -void ApplyLayerNormFloat(const int16_t* input, - const int16_t* layer_norm_weights, - int32_t layer_norm_scale_a, int32_t layer_norm_scale_b, - const int32_t* bias, int n_batch, int n_input, - int16_t* output) { - PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a, - layer_norm_scale_b, bias, n_batch, n_input, - output); -} - -void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input, - int16_t* output) { - PortableApplySigmoid(input, n_batch, n_input, output); -} - -void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input, - int16_t* output) { - PortableApplySigmoidFloat(input, n_batch, n_input, output); -} - -void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch, - int32_t n_input, int16_t* output) { - PortableApplyTanh(integer_bits, input, n_batch, n_input, output); -} - -void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input, - int32_t integer_bits, int16_t* output) { - PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output); -} - -void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch, - int n_input, int shift, int16_t* output) { - PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output); -} - -void CwiseMul(const int16_t* input_1, const int16_t* input_2, - int32_t multiplier, int32_t shift, int32_t n_batch, - int32_t n_input, int32_t output_zp, int8_t* output) { - PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input, - output_zp, output); -} - -void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch, - int n_input, int16_t* output) { - PortableCwiseAdd(input_1, input_2, n_batch, n_input, output); -} - -void CwiseClipping(float* vector, const int v_size, - const float clipping_value) { - PortableCwiseClipping(vector, v_size, clipping_value); -} - -void CwiseClipping(int16_t* vector, const int v_size, - const int16_t clipping_value) { - PortableCwiseClipping(vector, v_size, clipping_value); -} - -void CwiseClipping(int8_t* vector, const int v_size, - const int8_t clipping_value) { - PortableCwiseClipping(vector, v_size, clipping_value); -} - -void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size, - const int16_t* batch_vector, - int n_batch, int32_t multiplier, - int shift, int16_t* result) { - PortableVectorBatchVectorCwiseProductAccumulate( - vector, v_size, batch_vector, n_batch, multiplier, shift, result); -} - -float VectorVectorDotProduct(const float* vector1, const float* vector2, - int v_size) { - return PortableVectorVectorDotProduct(vector1, vector2, v_size); -} - -void BatchVectorBatchVectorDotProduct(const int16_t* vector1, - const int16_t* vector2, int v_size, - int n_batch, int32_t* result) { - PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch, - result); -} - -void Sub1Vector(const float* vector, int v_size, float* result) { - PortableSub1Vector(vector, v_size, result); -} - -void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) { - PortableSub1Vector(vector, v_size, result); -} - -// Multiply all elements of vector with a scalar. -void VectorScalarMultiply(const int8_t* vector, int v_size, float scale, - float* result) { - PortableVectorScalarMultiply(vector, v_size, scale, result); -} - -void ReductionSumVector(const float* input_vector, float* output_vector, - int output_size, int reduction_size) { - PortableReductionSumVector(input_vector, output_vector, output_size, - reduction_size); -} - -void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector, - int output_size, int reduction_size) { - PortableReductionSumVector(input_vector, output_vector, output_size, - reduction_size); -} - -void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector, - int output_size, int reduction_size) { - PortableReductionSumVector(input_vector, output_vector, output_size, - reduction_size); -} - -void MeanStddevNormalization(const float* input_vector, float* output_vector, - int v_size, int n_batch) { - PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch); -} - -void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, - const int8_t* recurrent, int8_t recurrent_zp, - int32_t input_effective_scale_a, - int32_t input_effective_scale_b, - int32_t recurrent_effective_scale_a, - int32_t recurrent_effective_scale_b, int32_t n_batch, - int32_t n_cell, int16_t* output) { - PortableTwoGateSaturatingAdd( - input, input_zp, recurrent, recurrent_zp, input_effective_scale_a, - input_effective_scale_b, recurrent_effective_scale_a, - recurrent_effective_scale_b, n_batch, n_cell, output); -} - -} // namespace tensor_utils -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h deleted file mode 100644 index 6c404d5e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h +++ /dev/null @@ -1,244 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_ - -#include -#include - -#if defined(_MSC_VER) -#define __restrict__ __restrict -#endif - -namespace tflite { - -// Not all backends support CpuBackendContext usage, so forward declare to avoid -// pulling in its implementation. -class CpuBackendContext; - -namespace tensor_utils { - -template -bool PortableIsZeroVector(const T* vector, int v_size) { - for (int i = 0; i < v_size; ++i) { - if (vector[i] != 0) { - return false; - } - } - return true; -} - -void PortableSymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float* min_value, - float* max_value, float* scaling_factor); - -void PortableSymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float min_value, - float max_value, float* scaling_factor); - -void PortableAsymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, - float* scaling_factor, int32_t* offset); - -// Multiply a matrix by a batch vector, and store results in a batch-size -// vector. -void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix, - int m_rows, int m_cols, - const float* vector, - int n_batch, float* result); - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, const float* scaling_factors, - int n_batch, float* __restrict__ result); - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, const float* scaling_factors, - int n_batch, float* __restrict__ result, const float* per_channel_scale, - const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, - bool* compute_row_sums, CpuBackendContext* context); - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vector, const float* scaling_factors, - int n_batch, int32_t* scratch, float* __restrict__ result, - CpuBackendContext* context); - -void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4( - const float* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const float* __restrict__ vector, int n_batch, float* __restrict__ result); - -void PortableSparseMatrixBatchVectorMultiplyAccumulate( - const float* __restrict__ matrix, const uint8_t* __restrict__ ledger, - int m_rows, int m_cols, const float* __restrict__ vector, int n_batch, - float* __restrict__ result); - -void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16( - const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector, - int n_batch, const int32_t input_offset, const int32_t output_multiplier, - const int32_t output_shift, const int32_t output_offset, - const int32_t output_activation_min, const int32_t output_activation_max, - int8_t* __restrict__ result); - -void PortableSparseMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows, - const int m_cols, const int8_t* __restrict__ vectors, - const float* scaling_factors, int n_batch, float* __restrict__ result); - -// Dot product of two vectors. -float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, - int v_size); - -void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1, - const int16_t* vector2, - int v_size, int n_batch, - int32_t* result); - -void PortableVectorBatchVectorCwiseProductAccumulate( - const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch, - int32_t multiplier, int shift, int16_t* result); - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - int32_t* scratch, int16_t* output, CpuBackendContext* context); - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - int32_t* scratch, int8_t* output, CpuBackendContext* context); - -void PortableMatrixBatchVectorMultiply(const int8_t* input, - int32_t input_zeropoint, - const int8_t* input_to_gate_weights, - int32_t input_to_gate_effective_scale_a, - int32_t input_to_gate_effective_scale_b, - int32_t n_batch, int32_t n_input, - int32_t n_cell, int8_t* gate_output, - int8_t gate_output_zp); - -void PortableMatrixBatchVectorMultiply( - const int16_t* hidden, const int8_t* hidden_to_output_weights, - int32_t proj_effective_scale_a, int32_t proj_effective_scale_b, - const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden, - int32_t n_output, int32_t output_zp, int8_t* proj_output); - -void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix, - int32_t scalar, int32_t n_row, - int32_t n_col, int32_t* output); - -void PortableApplyLayerNorm(const int16_t* input, - const int16_t* layer_norm_weights, - const int32_t* bias, int32_t layer_norm_scale_a, - int32_t layer_norm_scale_b, int32_t variance_limit, - int n_batch, int n_input, int16_t* output); - -void PortableApplyLayerNormFloat(const int16_t* input, - const int16_t* layer_norm_weights, - int32_t layer_norm_scale_a, - int32_t layer_norm_scale_b, - const int32_t* bias, int n_batch, int n_input, - int16_t* output); - -void PortableApplySigmoid(const int16_t* input, int32_t n_batch, - int32_t n_input, int16_t* output); - -void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch, - int32_t n_input, int16_t* output); - -void PortableApplyTanh(int32_t integer_bits, const int16_t* input, - int32_t n_batch, int32_t n_input, int16_t* output); - -void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch, - int32_t n_input, int32_t integer_bits, - int16_t* output); - -void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, - int n_batch, int n_input, int shift, int16_t* output); - -void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, - int32_t multiplier, int32_t shift, int32_t n_batch, - int32_t n_input, int32_t output_zp, int8_t* output); - -void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2, - int n_batch, int n_input, int16_t* output); - -template -void PortableCwiseClipping(T* vector, const int v_size, - const T& clipping_value) { - for (int i = 0; i < v_size; i++) { - vector[i] = std::max(std::min(clipping_value, vector[i]), - static_cast(-clipping_value)); - } -} - -// Batch vector initialization with another vector. -void PortableVectorBatchVectorAssign(const float* vector, int v_size, - int n_batch, float* batch_vector); - -// Compute "1.0f - elements of vector" (used in CIFG). -void PortableSub1Vector(const float* vector, int v_size, float* result); - -void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result); - -// Multiply all elements of vector with a scalar. -void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale, - float* result); - -// Reduce-sum on a vector: -// input_vector: pointer to input vector. -// output_vector: pointer to vector. -// output_size: output vector size. -// reduction_size: number of consecutive elements from input vector which are -// added to get one element of output. -template -void PortableReductionSumVector(const INPUT* input_vector, - OUTPUT* output_vector, int output_size, - int reduction_size) { - for (int o = 0; o < output_size; o++) { - OUTPUT result = 0; - for (int r = 0; r < reduction_size; r++) { - result += input_vector[r]; - } - output_vector[o] = result; - input_vector += reduction_size; - } -} - -// Layer norm for each batch. -void PortableMeanStddevNormalization(const float* __restrict__ input_vector, - float* __restrict__ output_vector, - int v_size, int n_batch); - -// Saturate Add. -void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, - const int8_t* recurrent, int8_t recurrent_zp, - int32_t input_effective_scale_a, - int32_t input_effective_scale_b, - int32_t recurrent_effective_scale_a, - int32_t recurrent_effective_scale_b, - int32_t n_batch, int32_t n_cell, - int16_t* output); - -} // namespace tensor_utils -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/prelu.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/prelu.h deleted file mode 100644 index aa9901d6..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/prelu.h +++ /dev/null @@ -1,111 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -// Broadcast prelu to output_shape for quantized uint8_t/int8_t data. -template -inline void BroadcastPrelu4DSlow( - const PreluParams& params, const RuntimeShape& input_shape, - const T* input_data, const RuntimeShape& alpha_shape, const T* alpha_data, - const RuntimeShape& output_shape, T* output_data) { - TFLITE_DCHECK_LE(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(alpha_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4); - const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(4, output_shape); - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input_shape, alpha_shape, &desc1, &desc2); - - for (int b = 0; b < extended_output_shape.Dims(0); ++b) { - for (int y = 0; y < extended_output_shape.Dims(1); ++y) { - for (int x = 0; x < extended_output_shape.Dims(2); ++x) { - for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - int output_index = Offset(extended_output_shape, b, y, x, c); - int input_index = SubscriptToIndex(desc1, b, y, x, c); - const int32_t input_value = - params.input_offset + input_data[input_index]; - int32_t output_value; - if (input_value >= 0) { - output_value = MultiplyByQuantizedMultiplier( - input_value, params.output_multiplier_1, params.output_shift_1); - } else { - auto alpha_index = SubscriptToIndex(desc2, b, y, x, c); - const int32_t alpha_value = - params.alpha_offset + alpha_data[alpha_index]; - - output_value = MultiplyByQuantizedMultiplier( - input_value * alpha_value, params.output_multiplier_2, - params.output_shift_2); - } - output_value += params.output_offset; - - const int32_t quantized_min = std::numeric_limits::min(); - const int32_t quantized_max = std::numeric_limits::max(); - const int32_t clamped_output = - std::min(quantized_max, std::max(quantized_min, output_value)); - output_data[output_index] = static_cast(clamped_output); - } - } - } - } -} - -template -inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape, - const T* input_data, const RuntimeShape& alpha_shape, - const T* alpha_data, const RuntimeShape& output_shape, - T* output_data) { - const int32_t quantized_min = std::numeric_limits::min(); - const int32_t quantized_max = std::numeric_limits::max(); - - const int flat_size = - MatchingElementsSize(input_shape, alpha_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - const int32_t input_value = params.input_offset + input_data[i]; - int32_t output_value; - if (input_value >= 0) { - output_value = MultiplyByQuantizedMultiplier( - input_value, params.output_multiplier_1, params.output_shift_1); - } else { - const int32_t alpha_value = params.alpha_offset + alpha_data[i]; - - output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value, - params.output_multiplier_2, - params.output_shift_2); - } - output_value += params.output_offset; - - const int32_t clamped_output = - std::min(quantized_max, std::max(quantized_min, output_value)); - output_data[i] = static_cast(clamped_output); - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h deleted file mode 100644 index bda27693..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_ - -#include - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -// Consolidates dimensions in broadcast inputs, checks for five-fold pattern. -// -// For example, if sequence of dimensions of one input is -// ..., 1, 3, 1, 7, 9, 5,... and the other is ..., 2, 3, 1, 7, 1, 1, ... -// we can consolidate these as -// ..., 1, 3*7, 9*5, ... and 2, 3*7, 1. -// -// The category is updated in the less-frequent case of shapes that are -// not suited to a fivefold-loop broadcast. -// -// Falls back to generic pattern when it does not know how to process properly. -// -// Returns true iff there is some sort of broadcast, which includes five-fold -// patterns and falling back to generic broadcast. -inline bool ProcessBroadcastShapes(const RuntimeShape& shape0, - const RuntimeShape& shape1, - tflite::ArithmeticParams* params) { - const int dims_count = - std::max(shape0.DimensionsCount(), shape1.DimensionsCount()); - - params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast; - RuntimeShape scalar_shape(dims_count, 1); - - auto extended_shape0 = RuntimeShape::ExtendedShape(dims_count, shape0); - auto extended_shape1 = RuntimeShape::ExtendedShape(dims_count, shape1); - - // Check for "exact" match, implicitly accepting any scalar shapes. - if (extended_shape0 == extended_shape1) { - params->broadcast_category = BroadcastableOpCategory::kNonBroadcast; - return false; - } - - for (int i = dims_count - 1; i >= 0; --i) { - if (extended_shape0.Dims(i) == extended_shape1.Dims(i)) { - continue; - } else if (extended_shape0.Dims(i) == 1) { - params->broadcast_category = - BroadcastableOpCategory::kFirstInputBroadcastsFast; - break; - } else if (extended_shape1.Dims(i) == 1) { - params->broadcast_category = - BroadcastableOpCategory::kSecondInputBroadcastsFast; - break; - } else { - // This case is erroneous: there is a dimension that does not match and - // is not a broadcast from one shape to the other. - params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast; - return true; - } - } - - if (params->broadcast_category != - BroadcastableOpCategory::kFirstInputBroadcastsFast && - params->broadcast_category != - BroadcastableOpCategory::kSecondInputBroadcastsFast) { - // This is unreachable because at least one else clause in the above loop - // must be reached. - TFLITE_DCHECK(false); - params->broadcast_category = BroadcastableOpCategory::kNonBroadcast; - return false; - } - - // From this point it is assumed contractually that corresponding dimensions - // in shape0 and shape1 are either (a) equal or (b) one or other equals 1. - const bool swap_inputs = params->broadcast_category == - BroadcastableOpCategory::kSecondInputBroadcastsFast; - const RuntimeShape* shape_a = - swap_inputs ? &extended_shape1 : &extended_shape0; - const RuntimeShape* shape_b = - swap_inputs ? &extended_shape0 : &extended_shape1; - - int i = dims_count - 1; - params->broadcast_shape[0] = 1; - params->broadcast_shape[1] = 1; - params->broadcast_shape[2] = 1; - params->broadcast_shape[3] = 1; - params->broadcast_shape[4] = 1; - // y_0 is greedy: include dims if both or neither equal 1: in other words, - // test for equality rather than (shape_a->Dims(i) != 1). - while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) { - params->broadcast_shape[4] *= shape_b->Dims(i); - --i; - } - // Here either input_a or input_b has dim of 1 (if i >= 0). If it is input_b - // that has the unit dimension, the next two loops are not entered. - while (i >= 0 && shape_a->Dims(i) == 1) { - params->broadcast_shape[3] *= shape_b->Dims(i); - --i; - } - while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) { - params->broadcast_shape[2] *= shape_a->Dims(i); - --i; - } - // Here either input_a or input_b has dim of 1 (if i >= 0). - while (i >= 0 && shape_b->Dims(i) == 1) { - params->broadcast_shape[1] *= shape_a->Dims(i); - --i; - } - while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) { - params->broadcast_shape[0] *= shape_b->Dims(i); - --i; - } - - // Rarer case is when the broadcast dimensions cannot be handled by a fivefold - // loop. - if (i >= 0) { - params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast; - } - return true; -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/quantize.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/quantize.h deleted file mode 100644 index f304b641..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/quantize.h +++ /dev/null @@ -1,89 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_ - -#include -#include -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -template -inline void AffineQuantize(const tflite::QuantizationParams& op_params, - const RuntimeShape& input_shape, - const InputT* input_data, - const RuntimeShape& output_shape, - OutputT* output_data) { - const int32_t zero_point = op_params.zero_point; - const double scale = op_params.scale; - const int flat_size = MatchingFlatSize(input_shape, output_shape); - static constexpr int32_t min_val = std::numeric_limits::min(); - static constexpr int32_t max_val = std::numeric_limits::max(); - - for (int i = 0; i < flat_size; i++) { - const InputT val = input_data[i]; - int32_t unclamped = - static_cast(TfLiteRound(val / static_cast(scale))) + - zero_point; - int32_t clamped = std::min(std::max(unclamped, min_val), max_val); - output_data[i] = clamped; - } -} - -// Quantizes per-channel. -template -inline void PerChannelQuantize( - const tflite::PerChannelQuantizationParams& op_params, - const RuntimeShape& input_shape, const InputT* input_data, - const RuntimeShape& output_shape, OutputT* output_data) { - // Ensure flat size is same. - MatchingFlatSize(input_shape, output_shape); - - const int32_t* zero_point = op_params.zero_point; - const float* scale = op_params.scale; - const int32_t quantized_dimension = op_params.quantized_dimension; - const int32_t num_dims = input_shape.DimensionsCount(); - const int32_t* dims_data = input_shape.DimsData(); - std::vector current_dim(num_dims, 0); - static constexpr int32_t min_val = std::numeric_limits::min(); - static constexpr int32_t max_val = std::numeric_limits::max(); - - do { - size_t offset = - ReducedOutputOffset(num_dims, reinterpret_cast(dims_data), - current_dim.data(), 0, nullptr); - const InputT val = input_data[offset]; - const int channel = current_dim[quantized_dimension]; - int32_t unclamped = static_cast(TfLiteRound( - val / static_cast(scale[channel]))) + - zero_point[channel]; - int32_t clamped = std::min(std::max(unclamped, min_val), max_val); - output_data[offset] = static_cast(clamped); - } while (NextIndex(num_dims, reinterpret_cast(dims_data), - current_dim.data())); -} - -} // namespace reference_ops - -} // namespace tflite -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/reduce.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/reduce.h deleted file mode 100644 index 341b3a08..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/reduce.h +++ /dev/null @@ -1,526 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_ - -#include - -#include "ruy/profiler/instrumentation.h" // from @ruy -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/max.h" -#include "tensorflow/lite/kernels/internal/min.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/types.h" - -// Check if the reduction at index is the first one along the dimensions given -// in axis. -inline bool IsFirstReduction(const int* index, const int num_axis, - const int* axis) { - if (num_axis == 0) { - return true; - } - - TFLITE_DCHECK(index != nullptr); - TFLITE_DCHECK(axis != nullptr); - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { - if (index[axis[axis_idx]] != 0) { - return false; - } - } - - return true; -} - -namespace tflite { - -namespace reference_ops { - -// A generic reduce method that can be used for reduce_sum, reduce_mean, etc. -// This method iterates through input data and reduce elements along the -// dimensions given in axis. -template -inline bool Reduce(const In* input_data, const int* input_dims, - const int* output_dims, const int input_num_dims, - const int output_num_dims, const int* axis, - const int num_axis, int* input_iter, - Out reducer(Out current, const In in), Out* output_data) { - // Reset input iterator. - for (int idx = 0; idx < input_num_dims; ++idx) { - input_iter[idx] = 0; - } - // Iterate through input_data. - do { - size_t input_offset = - ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr); - size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, - input_iter, num_axis, axis); - output_data[output_offset] = - reducer(output_data[output_offset], input_data[input_offset]); - } while (NextIndex(input_num_dims, input_dims, input_iter)); - return true; -} - -// Similar to above Reduce function but takes two reducer functions. -// The 'reducer_first' is called with the first value of the reduction, -// 'reducer_next' is then called for all the others. -template -inline bool Reduce(const In* input_data, const int* input_dims, - const int* output_dims, const int input_num_dims, - const int output_num_dims, const int* axis, - const int num_axis, int* input_iter, - const std::function& reducer_first, - const std::function& reducer_next, - Out* output_data) { - // Reset input iterator. - for (int idx = 0; idx < input_num_dims; ++idx) { - input_iter[idx] = 0; - } - // Iterate through input_data. - do { - size_t input_offset = - ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr); - size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims, - input_iter, num_axis, axis); - if (IsFirstReduction(input_iter, num_axis, axis)) { - output_data[output_offset] = reducer_first(input_data[input_offset]); - } else { - output_data[output_offset] = - reducer_next(output_data[output_offset], input_data[input_offset]); - } - } while (NextIndex(input_num_dims, input_dims, input_iter)); - return true; -} - -// This method parses the input 'axis' to remove duplicates and handle negative -// values, and returns a valid 'out_axis' -inline bool ResolveAxis(const int num_dims, const int* axis, - const int64_t num_axis, int* out_axis, - int* out_num_axis) { - *out_num_axis = 0; // Just in case. - // Short-circuit axis resolution for scalars; the axis will go unused. - if (num_dims == 0) { - return true; - } - // o(n^2) is fine since out_num_axis should be really small, mostly <= 4 - for (int64_t idx = 0; idx < num_axis; ++idx) { - // Handle negative index. A positive index 'p_idx' can be represented as a - // negative index 'n_idx' as: n_idx = p_idx-num_dims - // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1] */ - int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx]; - TFLITE_DCHECK(current >= 0 && current < num_dims); - if (current < 0 || current >= num_dims) { - return false; - } - bool is_dup = false; - for (int j = 0; j < *out_num_axis; ++j) { - if (out_axis[j] == current) { - is_dup = true; - break; - } - } - if (!is_dup) { - out_axis[*out_num_axis] = current; - *out_num_axis += 1; - } - } - return true; -} - -// This method expects that output_data has been initialized. -template -inline bool ReduceSumImpl(const In* input_data, const int* input_dims, - const int* output_dims, const int input_num_dims, - const int output_num_dims, const int* axis, - const int num_axis, int* input_iter, - Out* output_data) { - auto reducer = [](const Out current, const In in) -> Out { - const Out actual_in = static_cast(in); - return current + actual_in; - }; - return Reduce(input_data, input_dims, output_dims, input_num_dims, - output_num_dims, axis, num_axis, input_iter, reducer, - output_data); -} - -template -inline bool InitTensorDataForReduce(const int* dims, const int num_dims, - const T init_value, T* data) { - size_t num_elements = 1; - for (int idx = 0; idx < num_dims; ++idx) { - size_t current = static_cast(dims[idx]); - // Overflow prevention. - if (current > 0 && - num_elements > std::numeric_limits::max() / current) { - return false; - } - num_elements *= current; - } - for (size_t idx = 0; idx < num_elements; ++idx) { - data[idx] = init_value; - } - return true; -} - -// Computes the generic value (i.e., sum/max/min/prod) of elements across -// dimensions given in axis. It needs to pass in init_value and reducer. -template -inline bool ReduceGeneric(const T* input_data, const int* input_dims, - const int input_num_dims, T* output_data, - const int* output_dims, const int output_num_dims, - const int* axis, const int64_t num_axis_dimensions, - bool keep_dims, int* temp_index, int* resolved_axis, - T init_value, - T reducer(const T current, const T in)) { - // Reset output data. - if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value, - output_data)) { - return false; - } - - // Return early when input shape has zero dim. This is done after initializing - // data for output tensor because there are cases that the input tensor is - // empty but output tensor is not. In that case, output tensor should be - // filled with init_value. - for (int i = 0; i < input_num_dims; ++i) { - if (input_dims[i] == 0) return true; - } - - // Resolve axis. - int num_resolved_axis = 0; - if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis, - &num_resolved_axis)) { - return false; - } - - return Reduce(input_data, input_dims, output_dims, input_num_dims, - output_num_dims, resolved_axis, num_resolved_axis, - temp_index, reducer, output_data); -} - -// Computes the mean of elements across dimensions given in axis. -// It does so in two stages, first calculates the sum of elements along the axis -// then divides it by the number of element in axis. -template -inline bool Mean(const T* input_data, const int* input_dims, - const int input_num_dims, T* output_data, - const int* output_dims, const int output_num_dims, - const int* axis, const int num_axis_dimensions, bool keep_dims, - int* temp_index, int* resolved_axis, U* temp_sum) { - ruy::profiler::ScopeLabel label("Mean"); - // Reset output data. - size_t num_outputs = 1; - for (int idx = 0; idx < output_num_dims; ++idx) { - size_t current = static_cast(output_dims[idx]); - // Overflow prevention. - if (num_outputs > std::numeric_limits::max() / current) { - return false; - } - num_outputs *= current; - } - for (size_t idx = 0; idx < num_outputs; ++idx) { - output_data[idx] = T(); - temp_sum[idx] = U(); - } - - // Resolve axis. - int num_resolved_axis = 0; - if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis, - &num_resolved_axis)) { - return false; - } - - if (!ReduceSumImpl(input_data, input_dims, output_dims, input_num_dims, - output_num_dims, resolved_axis, num_resolved_axis, - temp_index, temp_sum)) { - return false; - } - - // Calculate mean by dividing output_data by num of aggregated element. - size_t num_elements_in_axis = 1; - for (int idx = 0; idx < num_resolved_axis; ++idx) { - size_t current = static_cast(input_dims[resolved_axis[idx]]); - // Overflow prevention. - if (current > (std::numeric_limits::max() / num_elements_in_axis)) { - return false; - } - num_elements_in_axis *= current; - } - - if (num_elements_in_axis > 0) { - for (size_t idx = 0; idx < num_outputs; ++idx) { - output_data[idx] = - static_cast(temp_sum[idx] / static_cast(num_elements_in_axis)); - } - } - return true; -} - -template -inline void Mean(const tflite::MeanParams& op_params, - const RuntimeShape& unextended_input_shape, - const T* input_data, - const RuntimeShape& unextended_output_shape, T* output_data) { - ruy::profiler::ScopeLabel label("Mean4D"); - - // Current implementation only supports dimension equals 4 and simultaneous - // reduction over width and height. - TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4); - TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - const int output_batch = output_shape.Dims(0); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int output_depth = output_shape.Dims(3); - - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - - TFLITE_CHECK_EQ(op_params.axis_count, 2); - TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) || - (op_params.axis[0] == 2 && op_params.axis[1] == 1)); - TFLITE_CHECK_EQ(output_height, 1); - TFLITE_CHECK_EQ(output_width, 1); - - for (int out_b = 0; out_b < output_batch; ++out_b) { - for (int out_d = 0; out_d < output_depth; ++out_d) { - float value = 0; - for (int in_h = 0; in_h < input_height; ++in_h) { - for (int in_w = 0; in_w < input_width; ++in_w) { - value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)]; - } - } - output_data[Offset(output_shape, out_b, 0, 0, out_d)] = - value / (input_width * input_height); - } - } -} - -inline void Mean(const tflite::MeanParams& op_params, - const RuntimeShape& unextended_input_shape, - const uint8_t* input_data, int32_t input_zero_point, - float input_scale, const RuntimeShape& unextended_output_shape, - uint8_t* output_data, int32_t output_zero_point, - float output_scale) { - ruy::profiler::ScopeLabel label("Mean4D/Uint8"); - - // Current implementation only supports dimension equals 4 and simultaneous - // reduction over width and height. - TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4); - TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - const int output_batch = output_shape.Dims(0); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int output_depth = output_shape.Dims(3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const float num_elements_in_axis = input_width * input_height; - - TFLITE_CHECK_EQ(op_params.axis_count, 2); - TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) || - (op_params.axis[0] == 2 && op_params.axis[1] == 1)); - TFLITE_CHECK_EQ(output_height, 1); - TFLITE_CHECK_EQ(output_width, 1); - - constexpr int32_t kMinValue = std::numeric_limits::min(); - constexpr int32_t kMaxValue = std::numeric_limits::max(); - - float temp = input_zero_point * input_scale / output_scale; - temp = temp > 0 ? temp + 0.5f : temp - 0.5f; - int32_t bias = output_zero_point - static_cast(temp); - double real_scale = - static_cast(input_scale / (num_elements_in_axis * output_scale)); - - int32_t multiplier; - int shift; - QuantizeMultiplier(real_scale, &multiplier, &shift); - for (int out_b = 0; out_b < output_batch; ++out_b) { - for (int out_d = 0; out_d < output_depth; ++out_d) { - int32_t acc = 0; - for (int in_h = 0; in_h < input_height; ++in_h) { - for (int in_w = 0; in_w < input_width; ++in_w) { - acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)]; - } - } - acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift); - acc += bias; - acc = std::min(std::max(acc, kMinValue), kMaxValue); - output_data[Offset(output_shape, out_b, 0, 0, out_d)] = - static_cast(acc); - } - } -} - -// Computes the mean of elements across dimensions given in axis. -// It does so in two stages, first calculates the sum of elements along the axis -// then divides it by the number of element in axis for quantized values. -template -inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point, - float input_scale, const int* input_dims, - const int input_num_dims, T* output_data, - int32_t output_zero_point, float output_scale, - const int* output_dims, - const int output_num_dims, const int* axis, - const int num_axis_dimensions, bool keep_dims, - int* temp_index, int* resolved_axis, U* temp_sum, - bool compute_sum) { - const bool uint8_case = std::is_same::value; - const bool int16_case = std::is_same::value; - if (uint8_case) { - ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8"); - } else if (int16_case) { - ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16"); - } else { - ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8"); - } - // Reset output data. - size_t num_outputs = 1; - for (int idx = 0; idx < output_num_dims; ++idx) { - size_t current = static_cast(output_dims[idx]); - // Overflow prevention. - if (num_outputs > std::numeric_limits::max() / current) { - return false; - } - num_outputs *= current; - } - for (size_t idx = 0; idx < num_outputs; ++idx) { - output_data[idx] = T(); - temp_sum[idx] = U(); - } - - // Return early when input shape has zero dim. This is done after initializing - // data for output tensor because there are cases that the input tensor is - // empty but output tensor is not. In that case, output tensor should be - // filled with init_value. - for (int i = 0; i < input_num_dims; ++i) { - if (input_dims[i] == 0) return true; - } - - // Resolve axis. - int num_resolved_axis = 0; - if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis, - &num_resolved_axis)) { - return false; - } - - if (!ReduceSumImpl(input_data, input_dims, output_dims, input_num_dims, - output_num_dims, resolved_axis, num_resolved_axis, - temp_index, temp_sum)) { - return false; - } - - // Calculate mean by dividing output_data by num of aggregated element. - size_t num_elements_in_axis = 1; - for (int idx = 0; idx < num_resolved_axis; ++idx) { - size_t current = static_cast(input_dims[resolved_axis[idx]]); - // Overflow prevention. - if (current > (std::numeric_limits::max() / num_elements_in_axis)) { - return false; - } - num_elements_in_axis *= current; - } - - if (num_elements_in_axis > 0) { - const float scale = input_scale / output_scale; - if (compute_sum) { - // TODO(b/116341117): Eliminate float and do this completely in 8bit. - const float bias = -input_zero_point * scale * num_elements_in_axis; - for (size_t idx = 0; idx < num_outputs; ++idx) { - const U value = - static_cast(TfLiteRound(temp_sum[idx] * scale + bias)) + - output_zero_point; - output_data[idx] = static_cast(value); - } - } else { - const float bias = -input_zero_point * scale; - for (size_t idx = 0; idx < num_outputs; ++idx) { - float float_mean = static_cast(temp_sum[idx]) / - static_cast(num_elements_in_axis); - float result = TfLiteMin( - TfLiteRound(float_mean * scale + bias) + output_zero_point, - static_cast(std::numeric_limits::max())); - result = TfLiteMax(result, - static_cast(std::numeric_limits::min())); - output_data[idx] = static_cast(result); - } - } - } - return true; -} - -template -inline bool QuantizedReduceProd(const T* input_data, int32_t input_zero_point, - const RuntimeShape& input_shape, T* output_data, - int32_t output_zero_point, - const RuntimeShape& output_shape, - const int* axis, - const int64_t num_axis_dimensions, - bool keep_dims, int* temp_index, - int* resolved_axis, int32_t* temp_prod, - int32_t scaling_multiplier, int scaling_shift) { - const int32_t kMinValue = std::numeric_limits::min(); - const int32_t kMaxValue = std::numeric_limits::max(); - - // Resolve axis. - int num_resolved_axis = 0; - if (!ResolveAxis(input_shape.DimensionsCount(), axis, num_axis_dimensions, - resolved_axis, &num_resolved_axis)) { - return false; - } - - // Calculate the reduced product by rescaling each multiplication step to - // avoid an overflow. - auto reducer_first = [&](T in) -> int32_t { return in - input_zero_point; }; - - auto reducer_next = [&](int32_t current, T in) -> int32_t { - const int64_t result = - static_cast(current) * (in - input_zero_point); - return MultiplyByQuantizedMultiplier(result, scaling_multiplier, - scaling_shift); - }; - - if (!Reduce( - input_data, input_shape.DimsData(), output_shape.DimsData(), - input_shape.DimensionsCount(), output_shape.DimensionsCount(), - resolved_axis, num_resolved_axis, temp_index, reducer_first, - reducer_next, temp_prod)) { - return false; - } - - for (int i = 0; i < output_shape.FlatSize(); i++) { - int32_t result = - MultiplyByQuantizedMultiplier(static_cast(temp_prod[i]), - scaling_multiplier, scaling_shift) + - output_zero_point; - result = std::min(std::max(result, kMinValue), kMaxValue); - output_data[i] = static_cast(result); - } - - return true; -} - -} // namespace reference_ops - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/requantize.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/requantize.h deleted file mode 100644 index f35f6fc8..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/requantize.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_ - -#include - -#include "ruy/profiler/instrumentation.h" // from @ruy -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -template -inline void Requantize(const input_type* input_data, int32_t size, - int32_t effective_scale_multiplier, - int32_t effective_scale_shift, int32_t input_zeropoint, - int32_t output_zeropoint, output_type* output_data) { - ruy::profiler::ScopeLabel label("Requantize"); - const bool same_scale = - (effective_scale_multiplier == 1 << 30 && effective_scale_shift == 1); - if (same_scale) { - const bool mixed_type_int8_uint8 = - std::is_same::value && - std::is_same::value; - const bool mixed_type_uint8_int8 = - std::is_same::value && - std::is_same::value; - const int32_t zero_point_diff = input_zeropoint - output_zeropoint; - // Fast path to do requantization for the case when just a shift of 128 is - // needed. - if ((mixed_type_int8_uint8 && zero_point_diff == -128) || - (mixed_type_uint8_int8 && zero_point_diff == 128)) { - for (int i = 0; i < size; ++i) { - output_data[i] = input_data[i] ^ 0x80; - } - return; - } - } - static constexpr int32_t kMinOutput = std::numeric_limits::min(); - static constexpr int32_t kMaxOutput = std::numeric_limits::max(); - for (int i = 0; i < size; ++i) { - const int32_t input = input_data[i] - input_zeropoint; - const int32_t output = - MultiplyByQuantizedMultiplier(input, effective_scale_multiplier, - effective_scale_shift) + - output_zeropoint; - const int32_t clamped_output = - std::max(std::min(output, kMaxOutput), kMinOutput); - output_data[i] = static_cast(clamped_output); - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/resize_bilinear.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/resize_bilinear.h deleted file mode 100644 index b5edadb9..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/resize_bilinear.h +++ /dev/null @@ -1,228 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_ - -#include -#include -#include -#include - -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -inline void ComputeInterpolationValues(const float value, const float scale, - const bool half_pixel_centers, - int32_t input_size, float* scaled_value, - int32_t* lower_bound, - int32_t* upper_bound) { - if (half_pixel_centers) { - *scaled_value = (value + 0.5f) * scale - 0.5f; - } else { - *scaled_value = value * scale; - } - float scaled_value_floor = std::floor(*scaled_value); - *lower_bound = std::max(static_cast(scaled_value_floor), - static_cast(0)); - *upper_bound = - std::min(static_cast(std::ceil(*scaled_value)), input_size - 1); -} - -template -inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params, - const RuntimeShape& unextended_input_shape, - const T* input_data, - const RuntimeShape& unextended_output_size_shape, - const int32_t* output_size_data, - const RuntimeShape& unextended_output_shape, - T* output_data) { - // If half_pixel_centers is True, align_corners must be False. - TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners); - TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_size_shape = - RuntimeShape::ExtendedShape(4, unextended_output_size_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - int32_t batches = MatchingDim(input_shape, 0, output_shape, 0); - int32_t input_height = input_shape.Dims(1); - int32_t input_width = input_shape.Dims(2); - int32_t depth = MatchingDim(input_shape, 3, output_shape, 3); - - TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2); - int32_t output_height = - output_size_data[Offset(output_size_shape, 0, 0, 0, 0)]; - int32_t output_width = - output_size_data[Offset(output_size_shape, 0, 0, 0, 1)]; - - float height_scale = static_cast(input_height) / output_height; - float width_scale = static_cast(input_width) / output_width; - if (op_params.align_corners && output_height > 1) { - height_scale = static_cast(input_height - 1) / (output_height - 1); - } - if (op_params.align_corners && output_width > 1) { - width_scale = static_cast(input_width - 1) / (output_width - 1); - } - const float rounding_offset = std::numeric_limits::is_integer ? .5f : .0f; - - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < output_height; ++y) { - float input_y; - int32_t y0, y1; - ComputeInterpolationValues(y, height_scale, op_params.half_pixel_centers, - input_height, &input_y, &y0, &y1); - for (int x = 0; x < output_width; ++x) { - float input_x; - int32_t x0, x1; - ComputeInterpolationValues(x, width_scale, op_params.half_pixel_centers, - input_width, &input_x, &x0, &x1); - for (int c = 0; c < depth; ++c) { - T interpolation = - static_cast(input_data[Offset(input_shape, b, y0, x0, c)] * - (1 - (input_y - y0)) * (1 - (input_x - x0)) + - input_data[Offset(input_shape, b, y1, x0, c)] * - (input_y - y0) * (1 - (input_x - x0)) + - input_data[Offset(input_shape, b, y0, x1, c)] * - (1 - (input_y - y0)) * (input_x - x0) + - input_data[Offset(input_shape, b, y1, x1, c)] * - (input_y - y0) * (input_x - x0) + - rounding_offset); - output_data[Offset(output_shape, b, y, x, c)] = interpolation; - } - } - } - } -} - -inline void ComputeInterpolationValuesInteger( - const int32_t value, const int32_t scale_10, const bool half_pixel_centers, - int32_t input_size, int32_t* scaled_value, int32_t* lower_bound, - int32_t* upper_bound) { - if (half_pixel_centers) { - *scaled_value = value * scale_10 + scale_10 / 2 - (1 << 9); - } else { - *scaled_value = value * scale_10; - } - constexpr int32_t zero = 0; - *lower_bound = std::max(*scaled_value / (1 << 10), zero); - *upper_bound = - std::min((*scaled_value + (1 << 10) - 1) / (1 << 10), input_size - 1); -} - -// Same as above but doesn't use any floating-point for the resize -template -inline void ResizeBilinearInteger( - const tflite::ResizeBilinearParams& op_params, - const RuntimeShape& unextended_input_shape, const T* input_data, - const RuntimeShape& unextended_output_size_shape, - const int32_t* output_size_data, - const RuntimeShape& unextended_output_shape, T* output_data) { - // If half_pixel_centers is True, align_corners must be False. - TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners); - TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_size_shape = - RuntimeShape::ExtendedShape(4, unextended_output_size_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - const int32_t batches = MatchingDim(input_shape, 0, output_shape, 0); - const int32_t input_height = input_shape.Dims(1); - const int32_t input_width = input_shape.Dims(2); - const int32_t depth = MatchingDim(input_shape, 3, output_shape, 3); - - TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1); - TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2); - const int32_t output_height = - output_size_data[Offset(output_size_shape, 0, 0, 0, 0)]; - const int32_t output_width = - output_size_data[Offset(output_size_shape, 0, 0, 0, 1)]; - - int32_t height_scale_10 = - ((1 << 10) * input_height + output_height / 2) / output_height; - int32_t width_scale_10 = - ((1 << 10) * input_width + output_width / 2) / output_width; - if (op_params.align_corners && output_height > 1) { - height_scale_10 = - ((1 << 10) * (input_height - 1) + (output_height - 1) / 2) / - (output_height - 1); - } - if (op_params.align_corners && output_width > 1) { - width_scale_10 = ((1 << 10) * (input_width - 1) + (output_width - 1) / 2) / - (output_width - 1); - } - - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < output_height; ++y) { - int32_t input_y, y0, y1; - ComputeInterpolationValuesInteger(y, height_scale_10, - op_params.half_pixel_centers, - input_height, &input_y, &y0, &y1); - for (int x = 0; x < output_width; ++x) { - int32_t input_x, x0, x1; - ComputeInterpolationValuesInteger(x, width_scale_10, - op_params.half_pixel_centers, - input_width, &input_x, &x0, &x1); - for (int c = 0; c < depth; ++c) { - const int64_t output_20_ll = - static_cast( - input_data[Offset(input_shape, b, y0, x0, c)]) * - ((1 << 10) - (input_y - (1 << 10) * y0)) * - ((1 << 10) - (input_x - (1 << 10) * x0)); - const int64_t output_20_lu = - static_cast( - input_data[Offset(input_shape, b, y1, x0, c)]) * - (input_y - (1 << 10) * y0) * - ((1 << 10) - (input_x - (1 << 10) * x0)); - const int64_t output_20_rl = - static_cast( - input_data[Offset(input_shape, b, y0, x1, c)]) * - ((1 << 10) - (input_y - (1 << 10) * y0)) * - (input_x - (1 << 10) * x0); - const int64_t output_20_ru = - static_cast( - input_data[Offset(input_shape, b, y1, x1, c)]) * - (input_y - (1 << 10) * y0) * (input_x - (1 << 10) * x0); - const int64_t output_20 = - output_20_ll + output_20_lu + output_20_rl + output_20_ru; - const int64_t round = (output_20 > 0) ? (1 << 19) : -(1 << 19); - const T interpolation = - static_cast((output_20 + round) / (1 << 20)); - output_data[Offset(output_shape, b, y, x, c)] = interpolation; - } - } - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h deleted file mode 100644 index bf0b757e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h +++ /dev/null @@ -1,102 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_ - -#include -#include - -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -inline int32_t GetNearestNeighbor(const int input_value, - const int32_t input_size, - const int32_t output_size, - const bool align_corners, - const bool half_pixel_centers) { - const float scale = - (align_corners && output_size > 1) - ? (input_size - 1) / static_cast(output_size - 1) - : input_size / static_cast(output_size); - const float offset = half_pixel_centers ? 0.5f : 0.0f; - int32_t output_value = std::min( - align_corners - ? static_cast(TfLiteRound((input_value + offset) * scale)) - : static_cast(std::floor((input_value + offset) * scale)), - input_size - 1); - if (half_pixel_centers) { - output_value = std::max(static_cast(0), output_value); - } - return output_value; -} - -template -inline void ResizeNearestNeighbor( - const tflite::ResizeNearestNeighborParams& op_params, - const RuntimeShape& unextended_input_shape, const T* input_data, - const RuntimeShape& output_size_shape, const int32_t* output_size_data, - const RuntimeShape& unextended_output_shape, T* output_data) { - TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - int32_t batches = MatchingDim(input_shape, 0, output_shape, 0); - int32_t input_height = input_shape.Dims(1); - int32_t input_width = input_shape.Dims(2); - int32_t depth = MatchingDim(input_shape, 3, output_shape, 3); - - // The Tensorflow version of this op allows resize on the width and height - // axis only. - TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2); - int32_t output_height = output_size_data[0]; - int32_t output_width = output_size_data[1]; - - const int col_offset = input_shape.Dims(3); - const int row_offset = input_shape.Dims(2) * col_offset; - const int batch_offset = input_shape.Dims(1) * row_offset; - - const T* input_ptr = input_data; - T* output_ptr = output_data; - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < output_height; ++y) { - int32_t in_y = GetNearestNeighbor(y, input_height, output_height, - op_params.align_corners, - op_params.half_pixel_centers); - const T* y_input_ptr = input_ptr + in_y * row_offset; - for (int x = 0; x < output_width; ++x) { - int32_t in_x = GetNearestNeighbor(x, input_width, output_width, - op_params.align_corners, - op_params.half_pixel_centers); - const T* x_input_ptr = y_input_ptr + in_x * col_offset; - memcpy(output_ptr, x_input_ptr, depth * sizeof(T)); - output_ptr += depth; - } - } - input_ptr += batch_offset; - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/round.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/round.h deleted file mode 100644 index 9bd8f3f2..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/round.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_ - -#include - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -inline float RoundToNearest(float value) { - auto floor_val = std::floor(value); - auto diff = value - floor_val; - if ((diff < 0.5f) || - ((diff == 0.5f) && (static_cast(floor_val) % 2 == 0))) { - return floor_val; - } else { - return floor_val = floor_val + 1.0f; - } -} - -inline void Round(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - // Note that this implementation matches that of tensorFlow tf.round - // and corresponds to the bankers rounding method. - // cfenv (for fesetround) is not yet supported universally on Android, so - // using a work around. - output_data[i] = RoundToNearest(input_data[i]); - } -} - -} // namespace reference_ops -} // namespace tflite -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/slice.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/slice.h deleted file mode 100644 index cb73ea0d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/slice.h +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_ - -#include "tensorflow/lite/kernels/internal/portable_tensor.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -template -inline void Slice(const tflite::SliceParams& op_params, - const RuntimeShape& input_shape, - const RuntimeShape& output_shape, - SequentialTensorWriter* writer) { - const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape); - TFLITE_DCHECK_LE(op_params.begin_count, 5); - TFLITE_DCHECK_LE(op_params.size_count, 5); - const int begin_count = op_params.begin_count; - const int size_count = op_params.size_count; - // We front-pad the begin and size vectors. - int start[5]; - int stop[5]; - for (int i = 0; i < 5; ++i) { - int padded_i = 5 - i; - start[i] = - begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i]; - stop[i] = - (size_count < padded_i || op_params.size[size_count - padded_i] == -1) - ? ext_shape.Dims(i) - : start[i] + op_params.size[size_count - padded_i]; - } - - for (int i0 = start[0]; i0 < stop[0]; ++i0) { - for (int i1 = start[1]; i1 < stop[1]; ++i1) { - for (int i2 = start[2]; i2 < stop[2]; ++i2) { - for (int i3 = start[3]; i3 < stop[3]; ++i3) { - for (int i4 = start[4]; i4 < stop[4]; ++i4) { - writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4)); - } - } - } - } - } -} - -template -inline void Slice(const tflite::SliceParams& op_params, - const RuntimeShape& input_shape, const T* input_data, - const RuntimeShape& output_shape, T* output_data) { - SequentialTensorWriter writer(input_data, output_data); - return Slice(op_params, input_shape, output_shape, &writer); -} - -template -inline void Slice(const tflite::SliceParams& op_params, - const RuntimeShape& input_shape, const TfLiteTensor* input, - const RuntimeShape& output_shape, TfLiteTensor* output) { - SequentialTensorWriter writer(input, output); - return Slice(op_params, input_shape, output_shape, &writer); -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/softmax.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/softmax.h deleted file mode 100644 index 9f4b6398..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/softmax.h +++ /dev/null @@ -1,233 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_ - -#include -#include - -#include "fixedpoint/fixedpoint.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/op_macros.h" - -namespace tflite { -namespace reference_ops { - -inline void Softmax(const SoftmaxParams& params, - const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - - for (int i = 0; i < outer_size; ++i) { - // Find max element value which we'll use to ensure numerical stability - // taking advantage of the following equality: - // exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C)) - float max = std::numeric_limits::lowest(); - for (int c = 0; c < depth; ++c) { - max = std::max(max, input_data[i * depth + c]); - } - - // Compute sum. - float sum = 0.f; - for (int c = 0; c < depth; ++c) { - const float exp_c = std::exp((input_data[i * depth + c] - max) * - static_cast(params.beta)); - output_data[i * depth + c] = exp_c; - sum += exp_c; - } - - // Compute result. - for (int c = 0; c < depth; ++c) { - output_data[i * depth + c] = output_data[i * depth + c] / sum; - } - } -} - -// Quantized softmax with int8_t/uint8_t input and int8_t/uint8_t/int16_t -// output. -template -inline void Softmax(const SoftmaxParams& params, - const RuntimeShape& input_shape, const InputT* input_data, - const RuntimeShape& output_shape, OutputT* output_data) { - const int32_t input_beta_multiplier = params.input_multiplier; - const int32_t input_beta_left_shift = params.input_left_shift; - const int diff_min = params.diff_min; - // The representation chosen for the input to the exp() function is Q5.26. - // We need to leave extra space since values that we skip might be as large as - // -32 before multiplying by input_beta_multiplier, and therefore as large as - // -16 afterwards. Note that exp(-8) is definitely not insignificant to - // accumulation, but exp(-16) definitely is. - static const int kScaledDiffIntegerBits = 5; - static const int kAccumulationIntegerBits = 12; - using FixedPointScaledDiff = - gemmlowp::FixedPoint; - using FixedPointAccum = - gemmlowp::FixedPoint; - using FixedPoint0 = gemmlowp::FixedPoint; - - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - - for (int i = 0; i < outer_size; ++i) { - InputT max_in_row = std::numeric_limits::min(); - for (int c = 0; c < depth; ++c) { - max_in_row = std::max(max_in_row, input_data[i * depth + c]); - } - - FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); - for (int c = 0; c < depth; ++c) { - int32_t input_diff = - static_cast(input_data[i * depth + c]) - max_in_row; - if (input_diff >= diff_min) { - const int32_t input_diff_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_beta_multiplier, input_beta_left_shift); - const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); - sum_of_exps = sum_of_exps + gemmlowp::Rescale( - exp_on_negative_values(scaled_diff_f8)); - } - } - - int num_bits_over_unit; - FixedPoint0 shifted_scale = FixedPoint0::FromRaw(GetReciprocal( - sum_of_exps.raw(), kAccumulationIntegerBits, &num_bits_over_unit)); - - for (int c = 0; c < depth; ++c) { - int32_t input_diff = - static_cast(input_data[i * depth + c]) - max_in_row; - if (input_diff >= diff_min) { - const int32_t input_diff_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_beta_multiplier, input_beta_left_shift); - const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); - - FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); - int32_t unsat_output = gemmlowp::RoundingDivideByPOT( - (shifted_scale * exp_in_0).raw(), - num_bits_over_unit + 31 - (sizeof(OutputT) * 8)); - - const int32_t shifted_output = - unsat_output + - static_cast(std::numeric_limits::min()); - - output_data[i * depth + c] = static_cast(std::max( - std::min(shifted_output, - static_cast(std::numeric_limits::max())), - static_cast(std::numeric_limits::min()))); - } else { - output_data[i * depth + c] = std::numeric_limits::min(); - } - } - } -} - -// Computes exp(input - max_input) -inline int16_t SoftMaxCalculateExp(const SoftmaxParams& params, - const int16_t* input_data, const int depth, - int16_t max_in_row, int i, int c) { - int32_t input_diff = input_data[i * depth + c] - max_in_row; - // scale the input_diff such that [-65535, 0] correspond to [-10.0, 0.0] - // exp lut generated with range [-10, 0], as exp(-10) is negligible. - int32_t scaled_diff = MultiplyByQuantizedMultiplier( - input_diff, params.input_multiplier, params.input_left_shift); - // recenter to [-32768, 32767] - int32_t sym_scaled_diff = scaled_diff + 32767; - int16_t sat_sym_scaled_diff = - std::min(std::max(sym_scaled_diff, static_cast(-32768)), - static_cast(32767)); - // apply the exp() LUT activation function - return lut_lookup(sat_sym_scaled_diff, params.exp_lut); -} -// Quantized softmax with int16_t input and int16_t output. -inline void SoftmaxInt16(const SoftmaxParams& params, - const RuntimeShape& input_shape, - const int16_t* input_data, - const RuntimeShape& output_shape, - int16_t* output_data) { - const int trailing_dim = input_shape.DimensionsCount() - 1; - const int outer_size = - MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int depth = - MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - - for (int i = 0; i < outer_size; ++i) { - // Find the largest element - int16_t max_in_row = std::numeric_limits::min(); - for (int c = 0; c < depth; ++c) { - max_in_row = std::max(max_in_row, input_data[i * depth + c]); - } - - // This loops computes the exp values and their sum. We will need the exp - // values later on in the function so we cache them in the output_data - // buffer. This is an optimization done to avoid calculating the exp values - // twice making use of the output_data buffer as scratch memory. - int32_t sum_of_exps = 0; // Q16.15 fixed point format. - int16_t* exp_results_Q015 = output_data + i * depth; - for (int c = 0; c < depth; ++c) { - exp_results_Q015[c] = - SoftMaxCalculateExp(params, input_data, depth, max_in_row, i, c); - sum_of_exps += exp_results_Q015[c]; - } - - // Compute the reciprocal 1/sum_of_exps - uint8_t headroom_plus_one = - CountLeadingZeros(static_cast(sum_of_exps)); - int32_t shifted_sum = - ((static_cast(sum_of_exps) << (headroom_plus_one - 1)) + - (1 << 13)) >> - 14; - // since the LUT computes 1/(1 + x) we need to first compute x = (sum - 1). - // also, the LUT expects a symmetrical input, so we must also recenter x - // from [0, 65535] to [-32768, 32767]. - int32_t sym_shifted_sum = shifted_sum + (-((1 << 15) + (1 << 16))); - int16_t sat_sym_shifted_sum = static_cast( - std::min(std::max(sym_shifted_sum, static_cast(-32768)), - static_cast(32767))); - // apply 1/(1 + x) LUT activation function - int16_t reciprocal_scale_Q015 = - lut_lookup(sat_sym_shifted_sum, params.one_over_one_plus_x_lut); - - // Rescale the exp_result with reciprocal - // range of output is [0, 32767] correspond to [0.0, 1.0] - for (int c = 0; c < depth; ++c) { - uint8_t right_shift = 31 - headroom_plus_one; - int64_t round = 1 << (right_shift - 1); - int32_t result = (static_cast(exp_results_Q015[c]) * - static_cast(reciprocal_scale_Q015) + - round) >> - right_shift; - output_data[i * depth + c] = static_cast( - std::min(std::max(result, static_cast(0)), - static_cast(32767))); - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h deleted file mode 100644 index 7f844152..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h +++ /dev/null @@ -1,109 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_BATCH_ND_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_BATCH_ND_H_ - -#include - -#include "ruy/profiler/instrumentation.h" // from @ruy -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -// TODO(b/135760455): Move this method anonymous namespace in a cc file. -inline RuntimeShape ExtendShapeSpaceToBatch(const RuntimeShape& shape) { - if (shape.DimensionsCount() == 4) { - return shape; - } - RuntimeShape new_shape(4, 1); - new_shape.SetDim(0, shape.Dims(0)); - new_shape.SetDim(1, shape.Dims(1)); - new_shape.SetDim(3, shape.Dims(2)); - return new_shape; -} - -template -inline void SpaceToBatchND(const SpaceToBatchParams& params, - const RuntimeShape& unextended_input1_shape, - const T* input1_data, - const RuntimeShape& unextended_input2_shape, - const int32_t* block_shape_data, - const RuntimeShape& unextended_input3_shape, - const int32_t* paddings_data, - const RuntimeShape& unextended_output_shape, - T* output_data) { - ruy::profiler::ScopeLabel label("SpaceToBatchND"); - TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3); - TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(), - unextended_output_shape.DimensionsCount()); - - // Extends the input/output shape from 3D to 4D if needed, NHC -> NH1C. - const RuntimeShape input1_shape = - ExtendShapeSpaceToBatch(unextended_input1_shape); - const RuntimeShape output_shape = - ExtendShapeSpaceToBatch(unextended_output_shape); - - const int depth = input1_shape.Dims(3); - const int input_width = input1_shape.Dims(2); - const int input_height = input1_shape.Dims(1); - const int input_batch_size = input1_shape.Dims(0); - - const int output_width = output_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_batch_size = output_shape.Dims(0); - - const int block_shape_height = block_shape_data[0]; - const int block_shape_width = - unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1; - const int padding_top = paddings_data[0]; - const int padding_left = - unextended_input1_shape.DimensionsCount() == 4 ? paddings_data[2] : 0; - - // For uint8 quantized, the correct padding "zero value" is the output offset. - const int32_t pad_value = params.output_offset; - for (int out_b = 0; out_b < output_batch_size; ++out_b) { - int input_batch = out_b % input_batch_size; - int shift_w = (out_b / input_batch_size) % block_shape_width; - int shift_h = (out_b / input_batch_size) / block_shape_width; - for (int out_h = 0; out_h < output_height; ++out_h) { - for (int out_w = 0; out_w < output_width; ++out_w) { - T* out = output_data + Offset(output_shape, out_b, out_h, out_w, 0); - if (out_h * block_shape_height + shift_h < padding_top || - out_h * block_shape_height + shift_h >= - padding_top + input_height || - out_w * block_shape_width + shift_w < padding_left || - out_w * block_shape_width + shift_w >= padding_left + input_width) { - // This may not execute correctly when pad_value != 0 and T != uint8. - memset(out, pad_value, depth * sizeof(T)); - } else { - const T* in = - input1_data + - Offset(input1_shape, input_batch, - (out_h * block_shape_height + shift_h) - padding_top, - (out_w * block_shape_width + shift_w) - padding_left, 0); - memcpy(out, in, depth * sizeof(T)); - } - } - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_BATCH_ND_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/space_to_depth.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/space_to_depth.h deleted file mode 100644 index 7ad46549..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/space_to_depth.h +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_ - -#include - -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace reference_ops { - -template -inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params, - const RuntimeShape& unextended_input_shape, - const T* input_data, - const RuntimeShape& unextended_output_shape, - T* output_data) { - TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(4, unextended_input_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - const int input_depth = input_shape.Dims(3); - const int input_width = input_shape.Dims(2); - const int input_height = input_shape.Dims(1); - const int input_batch = input_shape.Dims(0); - - const int output_depth = output_shape.Dims(3); - const int output_width = output_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_batch = output_shape.Dims(0); - - const int32_t block_size = op_params.block_size; - - TFLITE_DCHECK_EQ(input_width, output_width * block_size); - TFLITE_DCHECK_EQ(input_height, output_height * block_size); - TFLITE_DCHECK_EQ(input_depth * block_size * block_size, output_depth); - TFLITE_DCHECK_EQ(input_batch, output_batch); - - for (int in_b = 0; in_b < input_batch; ++in_b) { - for (int in_h = 0; in_h < input_height; ++in_h) { - for (int in_w = 0; in_w < input_width; ++in_w) { - for (int in_d = 0; in_d < input_depth; ++in_d) { - const int out_d = - in_d + ((in_h % block_size) * block_size + in_w % block_size) * - input_depth; - const int out_w = in_w / block_size; - const int out_h = in_h / block_size; - const int out_b = in_b; - - const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d); - const int output_index = - Offset(output_shape, out_b, out_h, out_w, out_d); - - output_data[output_index] = input_data[input_index]; - } - } - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/strided_slice.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/strided_slice.h deleted file mode 100644 index 40dc2e91..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/strided_slice.h +++ /dev/null @@ -1,121 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_ - -#include "ruy/profiler/instrumentation.h" // from @ruy -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/portable_tensor.h" -#include "tensorflow/lite/kernels/internal/strided_slice_logic.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -template -inline void StridedSlice(const tflite::StridedSliceParams& op_params, - const RuntimeShape& unextended_input_shape, - const RuntimeShape& unextended_output_shape, - SequentialTensorWriter* writer) { - using strided_slice::LoopCondition; - using strided_slice::StartForAxis; - using strided_slice::StopForAxis; - - ruy::profiler::ScopeLabel label("StridedSlice"); - - // Note that the output_shape is not used herein. - tflite::StridedSliceParams params_copy = op_params; - - TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 5); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 5); - const RuntimeShape input_shape = - RuntimeShape::ExtendedShape(5, unextended_input_shape); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(5, unextended_output_shape); - - // Reverse and pad to 5 dimensions because that is what the runtime code - // requires (ie. all shapes must be 5D and are given backwards). - strided_slice::StridedSlicePadIndices(¶ms_copy, 5); - - const int start_0 = StartForAxis(params_copy, input_shape, 0); - const int stop_0 = StopForAxis(params_copy, input_shape, 0, start_0); - const int start_1 = StartForAxis(params_copy, input_shape, 1); - const int stop_1 = StopForAxis(params_copy, input_shape, 1, start_1); - const int start_2 = StartForAxis(params_copy, input_shape, 2); - const int stop_2 = StopForAxis(params_copy, input_shape, 2, start_2); - const int start_3 = StartForAxis(params_copy, input_shape, 3); - const int stop_3 = StopForAxis(params_copy, input_shape, 3, start_3); - const int start_4 = StartForAxis(params_copy, input_shape, 4); - const int stop_4 = StopForAxis(params_copy, input_shape, 4, start_4); - - for (int offset_0 = start_0 * input_shape.Dims(1), - end_0 = stop_0 * input_shape.Dims(1), - step_0 = params_copy.strides[0] * input_shape.Dims(1); - !LoopCondition(offset_0, end_0, params_copy.strides[0]); - offset_0 += step_0) { - for (int offset_1 = (offset_0 + start_1) * input_shape.Dims(2), - end_1 = (offset_0 + stop_1) * input_shape.Dims(2), - step_1 = params_copy.strides[1] * input_shape.Dims(2); - !LoopCondition(offset_1, end_1, params_copy.strides[1]); - offset_1 += step_1) { - for (int offset_2 = (offset_1 + start_2) * input_shape.Dims(3), - end_2 = (offset_1 + stop_2) * input_shape.Dims(3), - step_2 = params_copy.strides[2] * input_shape.Dims(3); - !LoopCondition(offset_2, end_2, params_copy.strides[2]); - offset_2 += step_2) { - for (int offset_3 = (offset_2 + start_3) * input_shape.Dims(4), - end_3 = (offset_2 + stop_3) * input_shape.Dims(4), - step_3 = params_copy.strides[3] * input_shape.Dims(4); - !LoopCondition(offset_3, end_3, params_copy.strides[3]); - offset_3 += step_3) { - for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4; - !LoopCondition(offset_4, end_4, params_copy.strides[4]); - offset_4 += params_copy.strides[4]) { - writer->Write(offset_4); - } - } - } - } - } -} - -template -inline void StridedSlice(const tflite::StridedSliceParams& op_params, - const RuntimeShape& unextended_input_shape, - const T* input_data, - const RuntimeShape& unextended_output_shape, - T* output_data) { - SequentialTensorWriter writer(input_data, output_data); - StridedSlice(op_params, unextended_input_shape, unextended_output_shape, - &writer); -} - -template -inline void StridedSlice(const tflite::StridedSliceParams& op_params, - const RuntimeShape& unextended_input_shape, - const TfLiteTensor* input, - const RuntimeShape& unextended_output_shape, - TfLiteTensor* output) { - SequentialTensorWriter writer(input, output); - StridedSlice(op_params, unextended_input_shape, unextended_output_shape, - &writer); -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/sub.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/sub.h deleted file mode 100644 index d0ebc95a..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/sub.h +++ /dev/null @@ -1,479 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_ - -#include - -#include -#include - -#include "ruy/profiler/instrumentation.h" // from @ruy -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -inline void SubNonBroadcast(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const float* input1_data, - const RuntimeShape& input2_shape, - const float* input2_data, - const RuntimeShape& output_shape, - float* output_data) { - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = ActivationFunctionWithMinMax( - input1_data[i] - input2_data[i], params.float_activation_min, - params.float_activation_max); - } -} - -inline void SubNonBroadcast(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const int32_t* input1_data, - const RuntimeShape& input2_shape, - const int32_t* input2_data, - const RuntimeShape& output_shape, - int32_t* output_data) { - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - output_data[i] = ActivationFunctionWithMinMax( - input1_data[i] - input2_data[i], params.quantized_activation_min, - params.quantized_activation_max); - } -} - -// TODO(b/151345304): We can implement BroadcastSub on buffers of arbitrary -// dimensionality if the runtime code does a single loop over one dimension -// that handles broadcasting as the base case. The code generator would then -// generate max(D1, D2) nested for loops. -template -inline void BroadcastSubSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const float* input1_data, - const RuntimeShape& input2_shape, - const float* input2_data, - const RuntimeShape& output_shape, - float* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/float"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - auto sub_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, indexes)] - - input2_data[SubscriptToIndex(desc2, indexes)], - params.float_activation_min, params.float_activation_max); - }; - NDOpsHelper(output_desc, sub_func); -} - -template -inline void BroadcastSubSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const int32_t* input1_data, - const RuntimeShape& input2_shape, - const int32_t* input2_data, - const RuntimeShape& output_shape, - int32_t* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32_t"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - auto sub_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, indexes)] - - input2_data[SubscriptToIndex(desc2, indexes)], - params.quantized_activation_min, params.quantized_activation_max); - }; - NDOpsHelper(output_desc, sub_func); -} - -template -void BroadcastSubSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const int64_t* input1_data, - const RuntimeShape& input2_shape, - const int64_t* input2_data, - const RuntimeShape& output_shape, int64_t* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/int64_t"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - auto sub_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, indexes)] - - input2_data[SubscriptToIndex(desc2, indexes)], - params.int64_activation_min, params.int64_activation_max); - }; - NDOpsHelper(output_desc, sub_func); -} - -template -void BroadcastSubSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const T* input1_data, - const RuntimeShape& input2_shape, const T* input2_data, - const RuntimeShape& output_shape, T* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/templated"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - auto sub_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - ActivationFunctionWithMinMax( - input1_data[SubscriptToIndex(desc1, indexes)] - - input2_data[SubscriptToIndex(desc2, indexes)], - params.quantized_activation_min, params.quantized_activation_max); - }; - NDOpsHelper(output_desc, sub_func); -} - -template -inline void BroadcastSub16POTSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const int16_t* input1_data, - const RuntimeShape& input2_shape, - const int16_t* input2_data, - const RuntimeShape& output_shape, - int16_t* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSub16POTSlow/int16_t"); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - auto sub_func = [&](int indexes[N]) { - const int32_t input1_val = input1_data[SubscriptToIndex(desc1, indexes)]; - const int32_t input2_val = input2_data[SubscriptToIndex(desc2, indexes)]; - const int32_t scaled_input1_val = - gemmlowp::RoundingDivideByPOT(input1_val, -params.input1_shift); - const int32_t scaled_input2_val = - gemmlowp::RoundingDivideByPOT(input2_val, -params.input2_shift); - const int32_t raw_output = scaled_input1_val - scaled_input2_val; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[SubscriptToIndex(output_desc, indexes)] = - static_cast(clamped_output); - }; - NDOpsHelper(output_desc, sub_func); -} - -template -void BroadcastQuantSubSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const T* input1_data, - const RuntimeShape& input2_shape, - const T* input2_data, - const RuntimeShape& output_shape, T* output_data) { - ruy::profiler::ScopeLabel label("BroadcastQuantSubSlow/T"); - TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); - TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); - NdArrayDesc desc1; - NdArrayDesc desc2; - NdArrayDesc output_desc; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - auto sub_func = [&](int indexes[N]) { - const int32_t input1_val = - params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)]; - const int32_t input2_val = - params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)]; - const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); - const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32_t raw_sub = scaled_input1_val - scaled_input2_val; - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - raw_sub, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[SubscriptToIndex(output_desc, indexes)] = - static_cast(clamped_output); - }; - NDOpsHelper(output_desc, sub_func); -} - -// Element-wise add that can often be used for inner loop of broadcast add as -// well as the non-broadcast add. -template -inline void SubElementwise(int size, const ArithmeticParams& params, - const T* input1_data, const T* input2_data, - T* output_data) { - for (int i = 0; i < size; ++i) { - const int32_t input1_val = params.input1_offset + input1_data[i]; - const int32_t input2_val = params.input2_offset + input2_data[i]; - const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); - const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32_t raw_sub = scaled_input1_val - scaled_input2_val; - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - raw_sub, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); - } -} - -inline void Sub(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const uint8_t* input1_data, - const RuntimeShape& input2_shape, const uint8_t* input2_data, - const RuntimeShape& output_shape, uint8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - TFLITE_DCHECK_GT(params.input1_offset, -256); - TFLITE_DCHECK_GT(params.input2_offset, -256); - TFLITE_DCHECK_LT(params.input1_offset, 256); - TFLITE_DCHECK_LT(params.input2_offset, 256); - SubElementwise(flat_size, params, input1_data, input2_data, output_data); -} - -inline void Sub(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const int8_t* input1_data, - const RuntimeShape& input2_shape, const int8_t* input2_data, - const RuntimeShape& output_shape, int8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - TFLITE_DCHECK_GE(params.input1_offset, -128); - TFLITE_DCHECK_GE(params.input2_offset, -128); - // offset = -quantization_params.zero_point in PrepareGeneralSubOp(). - // So it's maximum can be 128 not 127. - TFLITE_DCHECK_LE(params.input1_offset, 128); - TFLITE_DCHECK_LE(params.input2_offset, 128); - SubElementwise(flat_size, params, input1_data, input2_data, output_data); -} - -inline void Sub(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const int16_t* input1_data, - const RuntimeShape& input2_shape, const int16_t* input2_data, - const RuntimeShape& output_shape, int16_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - TFLITE_DCHECK_EQ(params.input1_offset, 0); - TFLITE_DCHECK_EQ(params.input2_offset, 0); - SubElementwise(flat_size, params, input1_data, input2_data, output_data); -} - -template -void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape, - const T* input1_data, const RuntimeShape& input2_shape, - const T* input2_data, const RuntimeShape& output_shape, - T* output_data) { - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); - const RuntimeShape extended_output_shape = - RuntimeShape::ExtendedShape(4, output_shape); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - for (int b = 0; b < extended_output_shape.Dims(0); ++b) { - for (int y = 0; y < extended_output_shape.Dims(1); ++y) { - for (int x = 0; x < extended_output_shape.Dims(2); ++x) { - for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - output_data[Offset(extended_output_shape, b, y, x, c)] = - input1_data[SubscriptToIndex(desc1, b, y, x, c)] - - input2_data[SubscriptToIndex(desc2, b, y, x, c)]; - } - } - } - } -} - -inline void SetActivationMinMax(const ArithmeticParams& params, - int32_t* activation_min, - int32_t* activation_max) { - *activation_min = params.quantized_activation_min; - *activation_max = params.quantized_activation_max; -} - -inline void SetActivationMinMax(const ArithmeticParams& params, - float* activation_min, float* activation_max) { - *activation_min = params.float_activation_min; - *activation_max = params.float_activation_max; -} - -inline void SetActivationMinMax(const ArithmeticParams& params, - int64_t* activation_min, - int64_t* activation_max) { - *activation_min = params.int64_activation_min; - *activation_max = params.int64_activation_max; -} - -template -inline void SubWithActivation( - const ArithmeticParams& params, const RuntimeShape& input1_shape, - const T* input1_data, const RuntimeShape& input2_shape, - const T* input2_data, const RuntimeShape& output_shape, T* output_data) { - ruy::profiler::ScopeLabel label("SubWithActivation"); - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - T activation_min, activation_max; - SetActivationMinMax(params, &activation_min, &activation_max); - - for (int i = 0; i < flat_size; ++i) { - output_data[i] = ActivationFunctionWithMinMax( - input1_data[i] - input2_data[i], activation_min, activation_max); - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/tanh.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/tanh.h deleted file mode 100644 index 3a05c474..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/tanh.h +++ /dev/null @@ -1,129 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_ - -#include - -#include "fixedpoint/fixedpoint.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/op_macros.h" - -namespace tflite { -namespace reference_ops { - -inline void Tanh(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - for (int i = 0; i < flat_size; i++) { - float val = input_data[i]; - float result = std::tanh(val); - output_data[i] = result; - } -} - -// Convenience version that allows, for example, generated-code calls to be -// uniform between data types. -inline void Tanh(const TanhParams&, const RuntimeShape& input_shape, - const float* input_data, const RuntimeShape& output_shape, - float* output_data) { - // Drop params: not needed. - Tanh(input_shape, input_data, output_shape, output_data); -} - -inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, - const int16_t* input_data, const RuntimeShape& output_shape, - int16_t* output_data) { - const int input_left_shift = params.input_left_shift; - // Support for shifts is limited until we have a parameterized version of - // SaturatingRoundingMultiplyByPOT(). - TFLITE_DCHECK_GE(input_left_shift, 0); - TFLITE_DCHECK_LE(input_left_shift, 1); - - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - // F0 uses 0 integer bits, range [-1, 1]. - // This is the return type of math functions such as tanh, logistic, - // whose range is in [-1, 1]. - using F0 = gemmlowp::FixedPoint; - // F3 uses 3 integer bits, range [-8, 8], the input range expected here. - using F3 = gemmlowp::FixedPoint; - - if (input_left_shift == 0) { - for (int i = 0; i < flat_size; i++) { - F3 input = F3::FromRaw(input_data[i]); - F0 output = gemmlowp::tanh(input); - output_data[i] = output.raw(); - } - } else { - for (int i = 0; i < flat_size; i++) { - F3 input = F3::FromRaw( - gemmlowp::SaturatingRoundingMultiplyByPOT<1>(input_data[i])); - F0 output = gemmlowp::tanh(input); - output_data[i] = output.raw(); - } - } -} - -inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, - const uint8_t* input_data, const RuntimeShape& output_shape, - uint8_t* output_data) { - const int32_t input_zero_point = params.input_zero_point; - const int32_t input_range_radius = params.input_range_radius; - const int32_t input_multiplier = params.input_multiplier; - const int input_left_shift = params.input_left_shift; - const int32_t output_zero_point = 128; - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - for (int i = 0; i < flat_size; i++) { - const uint8_t input_val_u8 = input_data[i]; - const int32_t input_val_centered = - static_cast(input_val_u8) - input_zero_point; - uint8_t output_val; - if (input_val_centered <= -input_range_radius) { - output_val = 0; - } else if (input_val_centered >= input_range_radius) { - output_val = 255; - } else { - const int32_t input_val_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_val_centered, input_multiplier, input_left_shift); - using FixedPoint4 = gemmlowp::FixedPoint; - using FixedPoint0 = gemmlowp::FixedPoint; - const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled); - const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4); - // Convert from Q0.31 to Q24.7. - using gemmlowp::RoundingDivideByPOT; - int32_t output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 24); - output_val_s32 += output_zero_point; - if (output_val_s32 == 256) { - output_val_s32 = 255; - } - // Reinterpret as Q0.7, encoded in uint8_t. - TFLITE_DCHECK_GE(output_val_s32, 0); - TFLITE_DCHECK_LE(output_val_s32, 255); - output_val = static_cast(output_val_s32); - } - output_data[i] = output_val; - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/transpose.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/transpose.h deleted file mode 100644 index 96aa4cca..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/transpose.h +++ /dev/null @@ -1,111 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_ - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -template -void TransposeImpl(const TransposeParams& params, - const RuntimeShape& unextended_input_shape, - const T* input_data, - const RuntimeShape& unextended_output_shape, - T* output_data) { - const int unextended_input_size = unextended_input_shape.DimensionsCount(); - const int unextended_output_size = unextended_output_shape.DimensionsCount(); - TFLITE_DCHECK_LE(unextended_input_size, N); - TFLITE_DCHECK_LE(unextended_output_size, N); - TFLITE_DCHECK_EQ(unextended_output_size, params.perm_count); - const int input_ext_size = N - unextended_input_size; - const int output_ext_size = N - unextended_output_size; - NdArrayDesc input_desc; - NdArrayDesc output_desc; - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape), - &input_desc); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), - &output_desc); - - // The perm data is extended to match the output, each index incremented by - // the amount of front padding of the input shape. - int extended_perm[N]; - for (int i = 0; i < N; ++i) { - extended_perm[i] = i < output_ext_size - ? i - : params.perm[i - output_ext_size] + input_ext_size; - } - - // Permutes the input shape so we don't need to permute the indexes inside - // the loop. Check to make sure output_dims is matching input_dims. - NdArrayDesc perm_input_desc; - for (int k = 0; k < N; ++k) { - TFLITE_DCHECK_EQ(input_desc.extents[extended_perm[k]], - output_desc.extents[k]); - perm_input_desc.extents[k] = input_desc.extents[extended_perm[k]]; - perm_input_desc.strides[k] = input_desc.strides[extended_perm[k]]; - } - - // Naive transpose loop (iterate on output index and compute input index). - auto tranpose_func = [&](int indexes[N]) { - output_data[SubscriptToIndex(output_desc, indexes)] = - input_data[SubscriptToIndex(perm_input_desc, indexes)]; - }; - NDOpsHelper(output_desc, tranpose_func); -} - -template -void Transpose(const TransposeParams& params, - const RuntimeShape& unextended_input_shape, const T* input_data, - const RuntimeShape& unextended_output_shape, T* output_data) { - // Transpose kernel only does rearranging values not numeric evaluations on - // each cell. It's safe to implement per size of scalar type and this trick - // keeps the total code size in a reasonable range. - switch (sizeof(T)) { - case 1: - TransposeImpl(params, unextended_input_shape, - reinterpret_cast(input_data), - unextended_output_shape, - reinterpret_cast(output_data)); - break; - case 2: - TransposeImpl(params, unextended_input_shape, - reinterpret_cast(input_data), - unextended_output_shape, - reinterpret_cast(output_data)); - break; - - case 4: - TransposeImpl(params, unextended_input_shape, - reinterpret_cast(input_data), - unextended_output_shape, - reinterpret_cast(output_data)); - break; - case 8: - TransposeImpl(params, unextended_input_shape, - reinterpret_cast(input_data), - unextended_output_shape, - reinterpret_cast(output_data)); - break; - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/transpose_conv.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/transpose_conv.h deleted file mode 100644 index ac91f379..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/reference/transpose_conv.h +++ /dev/null @@ -1,219 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_CONV_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_CONV_H_ - -#include - -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -namespace reference_ops { - -inline void TransposeConv( - const ConvParams& params, const RuntimeShape& input_shape, - const float* input_data, const RuntimeShape& filter_shape, - const float* filter_data, const RuntimeShape& bias_shape, - const float* bias_data, const RuntimeShape& output_shape, - float* output_data, const RuntimeShape& im2col_shape, float* im2col_data) { - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - (void)im2col_data; // only used in optimized code. - (void)im2col_shape; // only used in optimized code. - - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); - const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - if (bias_data) { - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - } - - // Although transpose convolution simplifies to convolution with transposed - // weights for strides of 1, non-unitary striding complicates matters. To - // keep this reference implementation as clear as possible, we use a - // "scatter" access pattern, where we loop through all the input elements, - // computing their influence on the output, rather than looping through the - // output elements in the typical "gather" access pattern of a conv. We - // therefore must initialize the output array to zero. - const int num_elements = output_shape.FlatSize(); - for (int i = 0; i < num_elements; i++) { - output_data[i] = 0.0f; - } - - // Loop through input elements one at a time. - for (int batch = 0; batch < batches; ++batch) { - for (int in_y = 0; in_y < input_height; ++in_y) { - for (int in_x = 0; in_x < input_width; ++in_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - // Loop through the output elements it will influence - const int out_x_origin = (in_x * stride_width) - pad_width; - const int out_y_origin = (in_y * stride_height) - pad_height; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - for (int out_channel = 0; out_channel < output_depth; - ++out_channel) { - // Compute output element location - const int out_x = out_x_origin + filter_x; - const int out_y = out_y_origin + filter_y; - // We cannot accumulate out of bounds - if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && - (out_y < output_height)) { - float input_value = input_data[Offset( - input_shape, batch, in_y, in_x, in_channel)]; - float filter_value = - filter_data[Offset(filter_shape, out_channel, filter_y, - filter_x, in_channel)]; - output_data[Offset(output_shape, batch, out_y, out_x, - out_channel)] += - input_value * filter_value; - } - } - } - } - } - } - } - } - if (bias_data) { - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - output_data[Offset(output_shape, batch, out_y, out_x, - out_channel)] += bias_data[out_channel]; - } - } - } - } - } -} - -inline void TransposeConv( - const ConvParams& params, const RuntimeShape& input_shape, - const uint8_t* input_data, const RuntimeShape& filter_shape, - const uint8_t* filter_data, const RuntimeShape& bias_shape, - const int32_t* bias_data, const RuntimeShape& output_shape, - uint8_t* output_data, const RuntimeShape& im2col_shape, - uint8_t* im2col_data, int32_t* scratch_buffer) { - const int stride_width = params.stride_width; - const int stride_height = params.stride_height; - const int pad_width = params.padding_values.width; - const int pad_height = params.padding_values.height; - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - (void)im2col_data; // only used in optimized code. - (void)im2col_shape; // only used in optimized code. - - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); - const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int filter_height = filter_shape.Dims(1); - const int filter_width = filter_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - const int32_t input_offset = params.input_offset; - const int32_t filter_offset = params.weights_offset; - const int32_t output_offset = params.output_offset; - const int32_t output_multiplier = params.output_multiplier; - const int output_shift = params.output_shift; - const int32_t output_activation_min = params.quantized_activation_min; - const int32_t output_activation_max = params.quantized_activation_max; - TFLITE_DCHECK_LE(output_activation_min, output_activation_max); - if (bias_data) { - TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); - } - - const int num_elements = output_shape.FlatSize(); - // We need to initialize scratch_buffer to all 0s, as we apply the same - // 'scatter' based trick as in float version. - memset(scratch_buffer, 0, num_elements * sizeof(int32_t)); - - // Loop through input elements one at a time. - for (int batch = 0; batch < batches; ++batch) { - for (int in_y = 0; in_y < input_height; ++in_y) { - for (int in_x = 0; in_x < input_width; ++in_x) { - for (int in_channel = 0; in_channel < input_depth; ++in_channel) { - // Loop through the output elements it will influence. - const int out_x_origin = (in_x * stride_width) - pad_width; - const int out_y_origin = (in_y * stride_height) - pad_height; - for (int filter_y = 0; filter_y < filter_height; ++filter_y) { - for (int filter_x = 0; filter_x < filter_width; ++filter_x) { - for (int out_channel = 0; out_channel < output_depth; - ++out_channel) { - // Compute output element location. - const int out_x = out_x_origin + filter_x; - const int out_y = out_y_origin + filter_y; - // We cannot accumulate out of bounds. - if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && - (out_y < output_height)) { - uint8_t input_value = input_data[Offset( - input_shape, batch, in_y, in_x, in_channel)]; - uint8_t filter_value = - filter_data[Offset(filter_shape, out_channel, filter_y, - filter_x, in_channel)]; - scratch_buffer[Offset(output_shape, batch, out_y, out_x, - out_channel)] += - (input_value + input_offset) * - (filter_value + filter_offset); - } - } - } - } - } - } - } - } - for (int batch = 0; batch < batches; ++batch) { - for (int out_y = 0; out_y < output_height; ++out_y) { - for (int out_x = 0; out_x < output_width; ++out_x) { - for (int out_channel = 0; out_channel < output_depth; ++out_channel) { - int32_t acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x, - out_channel)]; - if (bias_data) { - acc += bias_data[out_channel]; - } - int32_t scaled_acc = MultiplyByQuantizedMultiplier( - acc, output_multiplier, output_shift); - scaled_acc += output_offset; - scaled_acc = std::max(scaled_acc, output_activation_min); - scaled_acc = std::min(scaled_acc, output_activation_max); - output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - static_cast(scaled_acc); - } - } - } - } -} - -} // namespace reference_ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_CONV_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/runtime_shape.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/runtime_shape.h deleted file mode 100644 index c2678b57..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/runtime_shape.h +++ /dev/null @@ -1,158 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_ - -namespace tflite { - -template -struct Dims { - int sizes[N]; - int strides[N]; -}; - -class RuntimeShape { - public: - RuntimeShape& operator=(RuntimeShape const&) = delete; - - // RuntimeShape in TFLM supports up to 5 dimensions. - // The name kMaxSmallSize comes from the same file of the upstream - // tensorflow lite repo and need to be kept the same for max reuse. - static constexpr int kMaxSmallSize = 5; - - RuntimeShape() : size_(0) {} - - explicit RuntimeShape(int dimensions_count) : size_(dimensions_count) {} - - RuntimeShape(int shape_size, int32_t value) : size_(shape_size) { - for (int i = 0; i < shape_size; ++i) { - SetDim(i, value); - } - } - - RuntimeShape(int dimensions_count, const int32_t* dims_data) - : size_(dimensions_count) { - ReplaceWith(dimensions_count, dims_data); - } - - bool operator==(const RuntimeShape& comp) const { - return this->size_ == comp.size_ && - std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32_t)) == - 0; - } - - ~RuntimeShape() {} - - int32_t DimensionsCount() const { return size_; } - int32_t Dims(int i) const { - TFLITE_DCHECK_GE(i, 0); - TFLITE_DCHECK_LT(i, size_); - return dims_[i]; - } - void SetDim(int i, int32_t val) { - TFLITE_DCHECK_GE(i, 0); - TFLITE_DCHECK_LT(i, size_); - dims_[i] = val; - } - - static RuntimeShape ExtendedShape(int new_shape_size, - const RuntimeShape& shape) { - return RuntimeShape(new_shape_size, shape, 1); - } - int32_t* DimsData() { return dims_; } - const int32_t* DimsData() const { return dims_; } - const int32_t* DimsDataUpTo5D() const { return dims_; } - - void ReplaceWith(int dimensions_count, const int32_t* dims_data) { - size_ = dimensions_count; - int32_t* dst_dims = DimsData(); - std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t)); - } - - // Returns the total count of elements, that is the size when flattened into a - // vector. - int FlatSize() const { - int buffer_size = 1; - const int* dims_data = reinterpret_cast(DimsData()); - for (int i = 0; i < size_; i++) { - buffer_size *= dims_data[i]; - } - return buffer_size; - } - - private: - // For use only by ExtendedShape(), written to guarantee (return-value) copy - // elision in C++17. - // This creates a shape padded to the desired size with the specified value. - RuntimeShape(int new_shape_size, const RuntimeShape& shape, int pad_value) - : size_(new_shape_size) { - // If the following check fails, it is likely because a 4D-only kernel is - // being used with an array of larger dimension count. - TFLITE_CHECK_GE(new_shape_size, shape.DimensionsCount()); - const int size_increase = new_shape_size - shape.DimensionsCount(); - for (int i = 0; i < size_increase; ++i) { - SetDim(i, pad_value); - } - std::memcpy(DimsData() + size_increase, shape.DimsData(), - sizeof(int32_t) * shape.DimensionsCount()); - } - - int32_t size_; - union { - int32_t dims_[kMaxSmallSize]; - }; -}; - -// Since tensors with '0' in their shape are valid in TF, these offset functions -// allow that as long as the corresponding index is also 0. It is upto the -// calling ops to ensure that they perform verification checks on tensor shapes -// if they don't support a particular behavior. - -inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) { - TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4); - const int* dims_data = reinterpret_cast(shape.DimsData()); - TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) || - (i0 >= 0 && i0 < dims_data[0])); - TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) || - (i1 >= 0 && i1 < dims_data[1])); - TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) || - (i2 >= 0 && i2 < dims_data[2])); - TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) || - (i3 >= 0 && i3 < dims_data[3])); - return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3; -} - -inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3, - int i4) { - TFLITE_DCHECK_EQ(shape.DimensionsCount(), 5); - const int* dims_data = reinterpret_cast(shape.DimsData()); - TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) || - (i0 >= 0 && i0 < dims_data[0])); - TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) || - (i1 >= 0 && i1 < dims_data[1])); - TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) || - (i2 >= 0 && i2 < dims_data[2])); - TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) || - (i3 >= 0 && i3 < dims_data[3])); - TFLITE_DCHECK((dims_data[4] == 0 && i4 == 0) || - (i4 >= 0 && i4 < dims_data[4])); - return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) * - dims_data[4] + - i4; -} - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/strided_slice_logic.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/strided_slice_logic.h deleted file mode 100644 index bfe84050..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/strided_slice_logic.h +++ /dev/null @@ -1,211 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_ - -#include -#include - -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { -namespace strided_slice { - -// Use until std::clamp() is available from C++17. -inline int Clamp(const int v, const int lo, const int hi) { - TFLITE_DCHECK(!(hi < lo)); - if (hi < v) return hi; - if (v < lo) return lo; - return v; -} - -inline void StridedSlicePadIndices(tflite::StridedSliceParams* p, - int dim_count) { - // Add indices and mask bits to fully include extra dimensions - TFLITE_CHECK_LE(dim_count, 5); - TFLITE_CHECK_GE(dim_count, p->start_indices_count); - TFLITE_CHECK_EQ(p->start_indices_count, p->stop_indices_count); - TFLITE_CHECK_EQ(p->stop_indices_count, p->strides_count); - - const int pad_count = dim_count - p->start_indices_count; - - // Pad indices at start, so move arrays by pad_count. - for (int i = p->start_indices_count - 1; i >= 0; --i) { - p->strides[i + pad_count] = p->strides[i]; - p->start_indices[i + pad_count] = p->start_indices[i]; - p->stop_indices[i + pad_count] = p->stop_indices[i]; - } - for (int i = 0; i < pad_count; ++i) { - p->start_indices[i] = 0; - p->stop_indices[i] = 1; - p->strides[i] = 1; - } - - // Pad masks with 0s or 1s as required. - p->shrink_axis_mask <<= pad_count; - p->ellipsis_mask <<= pad_count; - p->new_axis_mask <<= pad_count; - p->begin_mask <<= pad_count; - p->end_mask <<= pad_count; - p->begin_mask |= (1 << pad_count) - 1; - p->end_mask |= (1 << pad_count) - 1; - - p->start_indices_count = dim_count; - p->stop_indices_count = dim_count; - p->strides_count = dim_count; -} - -// Return the index for the first element along that axis. This index will be a -// positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0) -// that can be used to index directly into the data. -inline int StartForAxis(const tflite::StridedSliceParams& params, - const RuntimeShape& input_shape, int axis) { - const auto begin_mask = params.begin_mask; - const auto* start_indices = params.start_indices; - const auto* strides = params.strides; - const int axis_size = input_shape.Dims(axis); - if (axis_size == 0) { - return 0; - } - // Begin with the specified index. - int start = start_indices[axis]; - - // begin_mask override - if (begin_mask & 1 << axis) { - if (strides[axis] > 0) { - // Forward iteration - use the first element. These values will get - // clamped below (Note: We could have set them to 0 and axis_size-1, but - // use lowest() and max() to maintain symmetry with StopForAxis()) - start = std::numeric_limits::lowest(); - } else { - // Backward iteration - use the last element. - start = std::numeric_limits::max(); - } - } - - // Handle negative indices - if (start < 0) { - start += axis_size; - } - - // Clamping - if (strides[axis] > 0) { - // Forward iteration - start = Clamp(start, 0, axis_size); - } else { - // Backward iteration - start = Clamp(start, -1, axis_size - 1); - } - - return start; -} - -// Return the "real" index for the end of iteration along that axis. This is an -// "end" in the traditional C sense, in that it points to one past the last -// element. ie. So if you were iterating through all elements of a 1D array of -// size 4, this function would return 4 as the stop, because it is one past the -// "real" indices of 0, 1, 2 & 3. -inline int StopForAxis(const tflite::StridedSliceParams& params, - const RuntimeShape& input_shape, int axis, - int start_for_axis) { - const auto end_mask = params.end_mask; - const auto shrink_axis_mask = params.shrink_axis_mask; - const auto* stop_indices = params.stop_indices; - const auto* strides = params.strides; - const int axis_size = input_shape.Dims(axis); - if (axis_size == 0) { - return 0; - } - - // Begin with the specified index - const bool shrink_axis = shrink_axis_mask & (1 << axis); - int stop = stop_indices[axis]; - - // When shrinking an axis, the end position does not matter (and can be - // incorrect when negative indexing is used, see Issue #19260). Always use - // start_for_axis + 1 to generate a length 1 slice, since start_for_axis has - // already been adjusted for negative indices. - if (shrink_axis) { - return start_for_axis + 1; - } - - // end_mask override - if (end_mask & (1 << axis)) { - if (strides[axis] > 0) { - // Forward iteration - use the last element. These values will get - // clamped below - stop = std::numeric_limits::max(); - } else { - // Backward iteration - use the first element. - stop = std::numeric_limits::lowest(); - } - } - - // Handle negative indices - if (stop < 0) { - stop += axis_size; - } - - // Clamping - // Because the end index points one past the last element, we need slightly - // different clamping ranges depending on the direction. - if (strides[axis] > 0) { - // Forward iteration - stop = Clamp(stop, 0, axis_size); - } else { - // Backward iteration - stop = Clamp(stop, -1, axis_size - 1); - } - - return stop; -} - -inline bool LoopCondition(int index, int stop, int stride) { - // True when we have reached the end of an axis and should loop. - return stride > 0 ? index >= stop : index <= stop; -} - -inline tflite::StridedSliceParams BuildStridedSliceParams( - int begin_mask, int end_mask, int shrink_axis_mask, - const std::vector& start_indices, const std::vector& stop_indices, - const std::vector& strides) { - tflite::StridedSliceParams op_params; - const int dims_count = start_indices.size(); - - op_params.start_indices_count = dims_count; - op_params.stop_indices_count = dims_count; - op_params.strides_count = dims_count; - for (int i = 0; i < dims_count; ++i) { - op_params.start_indices[i] = start_indices[i]; - op_params.stop_indices[i] = stop_indices[i]; - op_params.strides[i] = strides[i]; - } - - op_params.begin_mask = begin_mask; - op_params.ellipsis_mask = 0; - op_params.end_mask = end_mask; - op_params.new_axis_mask = 0; - op_params.shrink_axis_mask = shrink_axis_mask; - - return op_params; -} - -} // namespace strided_slice - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/tensor_ctypes.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/tensor_ctypes.h deleted file mode 100644 index f1d3e17f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/tensor_ctypes.h +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -template -inline T* GetTensorData(TfLiteTensor* tensor) { - return tensor != nullptr ? reinterpret_cast(tensor->data.raw) : nullptr; -} - -template -inline const T* GetTensorData(const TfLiteTensor* tensor) { - return tensor != nullptr ? reinterpret_cast(tensor->data.raw) - : nullptr; -} - -inline RuntimeShape GetTensorShape(const TfLiteTensor* tensor) { - if (tensor == nullptr) { - return RuntimeShape(); - } - - TfLiteIntArray* dims = tensor->dims; - const int dims_size = dims->size; - const int32_t* dims_data = reinterpret_cast(dims->data); - return RuntimeShape(dims_size, dims_data); -} - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/internal/types.h b/code/components/tflite-lib/tensorflow/lite/kernels/internal/types.h deleted file mode 100644 index c44ba48e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/internal/types.h +++ /dev/null @@ -1,1065 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_ - -#include -#include -#include -#include - -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/runtime_shape.h" - -namespace tflite { - -enum class FusedActivationFunctionType : uint8_t { - kNone, - kRelu6, - kRelu1, - kRelu -}; -enum class PaddingType : uint8_t { kNone, kSame, kValid }; - -struct PaddingValues { - int16_t width; - int16_t height; - // offset is used for calculating "remaining" padding, for example, `width` - // is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is - // 1 + 1 = 2. - int16_t width_offset; - // Same as width_offset except it's over the height dimension. - int16_t height_offset; -}; - -struct Padding3DValues { - int16_t width; - int16_t height; - int16_t depth; - // offset is used for calculating "remaining" padding, for example, `width` - // is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is - // 1 + 1 = 2. - int16_t width_offset; - // Same as width_offset except it's over the height dimension. - int16_t height_offset; - // Same as width_offset except it's over the depth dimension. - int16_t depth_offset; -}; - -// This enumeration allows for non-default formats for the weights array -// of a fully-connected operator, allowing the use of special optimized -// runtime paths. -enum class FullyConnectedWeightsFormat : uint8_t { - // Default format (flat 2D layout, the inner contiguous dimension - // is input_depth, the outer non-contiguous dimension is output_depth) - kDefault, - // Summary: optimized layout for fast CPU runtime implementation, - // aimed specifically at ARM CPUs at the moment, and specialized for - // 8-bit quantized layers. - // - // The use case we're concerned with here is: 8-bit quantization, - // large weights matrix that doesn't fit in cache (e.g. 4096x2048 in - // a key application that drove this), very small batch size (e.g. 1 -- 4). - // - // Even with 8-bit quantization of weights, the performance of memory - // accesses to the weights can become the dominant issue when - // the batch size is small, so each weight value is used in only a few - // arithmetic ops, i.e. the fully-connected node has a low arithmetic - // intensity. The specific issues that arise are of three kinds: - // (1) One may, ideally, max out DRAM bandwidth, i.e. be truly memory - // bound. That's the "good" issue to run into. - // (2) One may run into sub-optimal pre-fetching: the data hasn't been - // prefetched into the cache by the time we need it. - // (3) One may run into cache aliasing: multiple values that are - // pre-fetched, alias each other in the L1 cache (which typically - // has only 4-way set associativity in ARM CPUs) and thus evict - // each other before we get to using them. - // - // The point of this shuffling is to avoid issues (2) and (3) so that - // we get as fast as possible given only the hard constraint (1). - // This is achieved by turning the difficulty into a solution: the - // difficulty, that each value loaded from memory is used only in - // one kernel iteration, making this operation memory-intensive, hints at - // the solution, of shuffling the weights so that they are stored in the - // exact order as the kernel needs to load them, so that the memory - // accesses made by the kernel are trivial. This solves (2) because the - // trivial memory access pattern allows the CPU's automatic prefetching - // to perform very well (no need even for preload instructions), and this - // solves (3) because the values being loaded concurrently are now - // contiguous in the address space, thus don't alias each other in the cache. - // - // On ARM, we typically want our kernel to process a 4x16 block of weights - // at a time, because: - // - 16 is the number of bytes in a NEON register. - // - 4 is how many rows we need to handle concurrently in the kernel in - // order to have sufficient mutual independence of instructions to - // maximize arithmetic throughput. - // - // Finally, the 'Int8' part in the name refers to the fact that this - // weights format has each weights value encoded as a signed int8_t value, - // even if the data type of the weights buffer is uint8_t. This is intended - // to save runtime kernels the effort to have to XOR the top bit of these - // bytes before using them in signed arithmetic, see this file for more - // explanations on the 'signed int8_t trick' in matrix multiplication kernels: - // - // tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc - // - kShuffled4x16Int8, -}; - -// Quantization parameters, determining the mapping of quantized values -// to real values (i.e. determining how quantized values are mathematically -// interpreted). -// -// The correspondence is as follows: -// -// real_value = scale * (quantized_value - zero_point); -// -// In other words, zero_point designates which quantized value corresponds to -// the real 0 value, and scale designates the difference between the real values -// corresponding to consecutive quantized values differing by 1. -struct QuantizationParams { - int32_t zero_point = 0; - double scale = 0.0; -}; - -inline bool operator==(const QuantizationParams& qp1, - const QuantizationParams& qp2) { - return qp1.zero_point == qp2.zero_point && qp1.scale == qp2.scale; -} - -// Quantization parameters for each channel, determining the mapping of -// quantized values to real values. See QuantizationParams for a single set of -// parameters per tensor. This has one parameters set per each channel. -// -// The correspondence is as follows: -// -// real_value = scale[channel] * (quantized_value - zero_point[channel]); -// -struct PerChannelQuantizationParams { - // The following members typically point to the corresponding members of a - // TfLiteAffineQuantization struct. - const float* scale; - const int32_t* zero_point; - int32_t quantized_dimension; -}; - -// Gets next index to iterate through a multidimensional array. -inline bool NextIndex(const int num_dims, const int* dims, int* current) { - if (num_dims == 0) { - return false; - } - TFLITE_DCHECK(dims != nullptr); - TFLITE_DCHECK(current != nullptr); - int carry = 1; - for (int idx = num_dims - 1; idx >= 0; --idx) { - int current_val = current[idx] + carry; - TFLITE_DCHECK_GE(dims[idx], current_val); - if (dims[idx] == current_val) { - current[idx] = 0; - } else { - current[idx] = current_val; - carry = 0; - break; - } - } - return (carry == 0); -} - -// Gets offset of index if reducing on axis. When reducing, the flattened offset -// will not change, if the input index changes on the given axis. For example, -// if you have a 3D tensor and you are reducing to 2D by eliminating axis 0, -// then index (0, 1, 2) and index (1, 1, 2) will map to the same flattened -// offset. -// TODO(kanlig): uses Dims to represent dimensions. -inline size_t ReducedOutputOffset(const int num_dims, const int* dims, - const int* index, const int num_axis, - const int* axis) { - if (num_dims == 0) { - return 0; - } - TFLITE_DCHECK(dims != nullptr); - TFLITE_DCHECK(index != nullptr); - size_t offset = 0; - for (int idx = 0; idx < num_dims; ++idx) { - // if we need to skip this axis - bool is_axis = false; - if (axis != nullptr) { - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { - if (idx == axis[axis_idx]) { - is_axis = true; - break; - } - } - } - if (!is_axis) { - offset = offset * static_cast(dims[idx]) + - static_cast(index[idx]); - } - } - return offset; -} - -// Since tensors with '0' in their shape are valid in TF, these offset functions -// allow that as long as the corresponding index is also 0. It is upto the -// calling ops to ensure that they perform verification checks on tensor shapes -// if they don't support a particular behavior. - -inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) { - TFLITE_DCHECK((i0 == 0 && dims.sizes[0] == 0) || - (i0 >= 0 && i0 < dims.sizes[0])); - TFLITE_DCHECK((i1 == 0 && dims.sizes[1] == 0) || - (i1 >= 0 && i1 < dims.sizes[1])); - TFLITE_DCHECK((i2 == 0 && dims.sizes[2] == 0) || - (i2 >= 0 && i2 < dims.sizes[2])); - TFLITE_DCHECK((i3 == 0 && dims.sizes[3] == 0) || - (i3 >= 0 && i3 < dims.sizes[3])); - return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] + - i3 * dims.strides[3]; -} - -inline int Offset(const Dims<4>& dims, int* index) { - return Offset(dims, index[0], index[1], index[2], index[3]); -} - -// Get array size, DCHECKing that the dim index is in range. -// -// Note that this will be phased out with Dims<4>, since RuntimeShape::Dims() -// already performs this check. -template -int ArraySize(const Dims& array, int index) { - TFLITE_DCHECK(index >= 0 && index < N); - return array.sizes[index]; -} - -// Get common array size, DCHECKing that they all agree. -template -int MatchingArraySize(const ArrayType1& array1, int index1, - const ArrayType2& array2, int index2) { - TFLITE_DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2)); - return ArraySize(array1, index1); -} - -template -int MatchingArraySize(const ArrayType1& array1, int index1, - const ArrayType2& array2, int index2, Args... args) { - TFLITE_DCHECK_EQ(ArraySize(array1, index1), ArraySize(array2, index2)); - return MatchingArraySize(array1, index1, args...); -} - -// Get common shape dim, DCHECKing that they all agree. -inline int MatchingDim(const RuntimeShape& shape1, int index1, - const RuntimeShape& shape2, int index2) { - TFLITE_DCHECK_EQ(shape1.Dims(index1), shape2.Dims(index2)); - return std::min(shape1.Dims(index1), shape2.Dims(index2)); -} - -template -int MatchingDim(const RuntimeShape& shape1, int index1, - const RuntimeShape& shape2, int index2, Args... args) { - TFLITE_DCHECK_EQ(shape1.Dims(index1), shape2.Dims(index2)); - return MatchingDim(shape1, index1, args...); -} - -// Will be phased out with Dims<4>, replaced by RuntimeShape::FlatSize(). -template -inline int FlatSize(const Dims& dims) { - int flat_size = 1; - for (int i = 0; i < N; ++i) { - flat_size *= dims.sizes[i]; - } - return flat_size; -} - -TFLITE_DEPRECATED("Prefer FlatSize.") -inline int RequiredBufferSizeForDims(const Dims<4>& dims) { - return FlatSize(dims); -} - -inline int MatchingElementsSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0) { - const int size_1 = shape.FlatSize(); - const int size_2 = check_shape_0.FlatSize(); - TFLITE_CHECK_EQ(size_1, size_2); - return size_1; -} - -inline int MatchingElementsSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1) { - const int size_1 = shape.FlatSize(); - const int size_2 = check_shape_0.FlatSize(); - const int size_3 = check_shape_1.FlatSize(); - TFLITE_CHECK_EQ(size_1, size_2); - TFLITE_CHECK_EQ(size_2, size_3); - return size_1; -} - -// Flat size calculation, checking that dimensions match with one or more other -// arrays. -inline int MatchingFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0) { - TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount()); - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - return shape.FlatSize(); -} - -inline int MatchingFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1) { - TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount()); - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - return MatchingFlatSize(shape, check_shape_1); -} - -inline int MatchingFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1, - const RuntimeShape& check_shape_2) { - TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount()); - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - return MatchingFlatSize(shape, check_shape_1, check_shape_2); -} - -inline int MatchingFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1, - const RuntimeShape& check_shape_2, - const RuntimeShape& check_shape_3) { - TFLITE_DCHECK_EQ(shape.DimensionsCount(), check_shape_0.DimensionsCount()); - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - return MatchingFlatSize(shape, check_shape_1, check_shape_2, check_shape_3); -} - -// Flat size calculation, checking that dimensions match with one or more other -// arrays. -template -inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0) { - for (int i = 0; i < N; ++i) { - TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); - } - return FlatSize(dims); -} - -template -inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, - const Dims& check_dims_1) { - for (int i = 0; i < N; ++i) { - TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); - } - return MatchingFlatSize(dims, check_dims_1); -} - -template -inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, - const Dims& check_dims_1, - const Dims& check_dims_2) { - for (int i = 0; i < N; ++i) { - TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); - } - return MatchingFlatSize(dims, check_dims_1, check_dims_2); -} - -template -inline int MatchingFlatSize(const Dims& dims, const Dims& check_dims_0, - const Dims& check_dims_1, - const Dims& check_dims_2, - const Dims& check_dims_3) { - for (int i = 0; i < N; ++i) { - TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); - } - return MatchingFlatSize(dims, check_dims_1, check_dims_2, check_dims_3); -} - -// Flat size calculation, checking if their extended shapes match. -inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0) { - const int shape_dims = shape.DimensionsCount(); - const int check_shape_0_dims = check_shape_0.DimensionsCount(); - const int min_dims = std::min(shape_dims, check_shape_0_dims); - - for (int i = 0; i < min_dims; ++i) { - TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i), - check_shape_0.Dims(check_shape_0_dims - 1 - i)); - } - for (int i = min_dims; i < shape_dims; ++i) { - TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i), 1); - } - for (int i = min_dims; i < check_shape_0_dims; ++i) { - TFLITE_DCHECK_EQ(check_shape_0.Dims(check_shape_0_dims - 1 - i), 1); - } - return shape.FlatSize(); -} - -inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1) { - const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0); - TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1), - flat_size); - return flat_size; -} - -inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1, - const RuntimeShape& check_shape_2) { - const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0); - TFLITE_DCHECK_EQ( - MatchingExtendedShapeFlatSize(shape, check_shape_1, check_shape_2), - flat_size); - return flat_size; -} - -inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1, - const RuntimeShape& check_shape_2, - const RuntimeShape& check_shape_3) { - const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0); - TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1, - check_shape_2, check_shape_3), - flat_size); - return flat_size; -} - -// Data is required to be contiguous, and so many operators can use either the -// full array flat size or the flat size with one dimension skipped (commonly -// the depth). -template -inline int FlatSizeSkipDim(const Dims& dims, int skip_dim) { - TFLITE_DCHECK(skip_dim >= 0 && skip_dim < N); - int flat_size = 1; - for (int i = 0; i < N; ++i) { - flat_size *= (i == skip_dim) ? 1 : dims.sizes[i]; - } - return flat_size; -} - -// A combination of MatchingFlatSize() and FlatSizeSkipDim(). -template -inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, - const Dims& check_dims_0) { - for (int i = 0; i < N; ++i) { - if (i != skip_dim) { - TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); - } - } - return FlatSizeSkipDim(dims, skip_dim); -} - -template -inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, - const Dims& check_dims_0, - const Dims& check_dims_1) { - for (int i = 0; i < N; ++i) { - if (i != skip_dim) { - TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); - } - } - return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1); -} - -template -inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, - const Dims& check_dims_0, - const Dims& check_dims_1, - const Dims& check_dims_2) { - for (int i = 0; i < N; ++i) { - if (i != skip_dim) { - TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); - } - } - return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1, check_dims_2); -} - -template -inline int MatchingFlatSizeSkipDim(const Dims& dims, int skip_dim, - const Dims& check_dims_0, - const Dims& check_dims_1, - const Dims& check_dims_2, - const Dims& check_dims_3) { - for (int i = 0; i < N; ++i) { - if (i != skip_dim) { - TFLITE_DCHECK_EQ(ArraySize(dims, i), ArraySize(check_dims_0, i)); - } - } - return MatchingFlatSizeSkipDim(dims, skip_dim, check_dims_1, check_dims_2, - check_dims_3); -} - -// Data is required to be contiguous, and so many operators can use either the -// full array flat size or the flat size with one dimension skipped (commonly -// the depth). -inline int FlatSizeSkipDim(const RuntimeShape& shape, int skip_dim) { - const int dims_count = shape.DimensionsCount(); - TFLITE_DCHECK(skip_dim >= 0 && skip_dim < dims_count); - const auto* dims_data = shape.DimsData(); - int flat_size = 1; - for (int i = 0; i < dims_count; ++i) { - flat_size *= (i == skip_dim) ? 1 : dims_data[i]; - } - return flat_size; -} - -// A combination of MatchingFlatSize() and FlatSizeSkipDim(). -inline int MatchingFlatSizeSkipDim(const RuntimeShape& shape, int skip_dim, - const RuntimeShape& check_shape_0) { - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - if (i != skip_dim) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - } - return FlatSizeSkipDim(shape, skip_dim); -} - -inline int MatchingFlatSizeSkipDim(const RuntimeShape& shape, int skip_dim, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1) { - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - if (i != skip_dim) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - } - return MatchingFlatSizeSkipDim(shape, skip_dim, check_shape_1); -} - -inline int MatchingFlatSizeSkipDim(const RuntimeShape& shape, int skip_dim, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1, - const RuntimeShape& check_shape_2) { - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - if (i != skip_dim) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - } - return MatchingFlatSizeSkipDim(shape, skip_dim, check_shape_1, check_shape_2); -} - -inline int MatchingFlatSizeSkipDim(const RuntimeShape& shape, int skip_dim, - const RuntimeShape& check_shape_0, - const RuntimeShape& check_shape_1, - const RuntimeShape& check_shape_2, - const RuntimeShape& check_shape_3) { - const int dims_count = shape.DimensionsCount(); - for (int i = 0; i < dims_count; ++i) { - if (i != skip_dim) { - TFLITE_DCHECK_EQ(shape.Dims(i), check_shape_0.Dims(i)); - } - } - return MatchingFlatSizeSkipDim(shape, skip_dim, check_shape_1, check_shape_2, - check_shape_3); -} - -template -bool IsPackedWithoutStrides(const Dims& dims) { - int expected_stride = 1; - for (int d = 0; d < N; d++) { - if (dims.strides[d] != expected_stride) return false; - expected_stride *= dims.sizes[d]; - } - return true; -} - -template -void ComputeStrides(Dims* dims) { - dims->strides[0] = 1; - for (int d = 1; d < N; d++) { - dims->strides[d] = dims->strides[d - 1] * dims->sizes[d - 1]; - } -} - -enum class BroadcastableOpCategory : uint8_t { - kNone, - kNonBroadcast, // Matching input shapes. - kFirstInputBroadcastsFast, // Fivefold nested loops. - kSecondInputBroadcastsFast, // Fivefold nested loops. - kGenericBroadcast, // Fall-back. -}; - -struct MinMax { - float min; - float max; -}; -static_assert(sizeof(MinMax) == 8, ""); - -struct ActivationParams { - FusedActivationFunctionType activation_type; - // uint8_t, etc, activation params. - int32_t quantized_activation_min; - int32_t quantized_activation_max; -}; - -struct ReluParams : public ActivationParams { - int32_t input_offset; - int32_t output_offset; - int32_t output_multiplier; - int output_shift; -}; - -// Styles of resizing op usages. For example, kImageStyle can be used with a Pad -// op for pattern-specific optimization. -enum class ResizingCategory : uint8_t { - kNone, - kImageStyle, // 4D, operating on inner dimensions, say {0, a, b, 0}. - kGenericResize, -}; - -// For Add, Sub, Mul ops. -struct ArithmeticParams { - // Shape dependent / common to data / op types. - BroadcastableOpCategory broadcast_category; - // uint8_t inference params. - int32_t input1_offset; - int32_t input2_offset; - int32_t output_offset; - int32_t output_multiplier; - int output_shift; - // Add / Sub, not Mul, uint8_t inference params. - int left_shift; - int32_t input1_multiplier; - int input1_shift; - int32_t input2_multiplier; - int input2_shift; - - // TODO(b/158622529): Union the following activation params. - // uint8_t, etc, activation params. - int32_t quantized_activation_min; - int32_t quantized_activation_max; - // float activation params. - float float_activation_min; - float float_activation_max; - // int64_t activation params. - int64_t int64_activation_min; - int64_t int64_activation_max; - - // Processed output dimensions. - // Let input "a" be the one that broadcasts in the faster-changing dimension. - // Then, after coalescing, for shapes {a0, a1, a2, a3, a4} and - // {b0, b1, b2, b3, b4}, - // broadcast_shape[4] = b0 = a0. - // broadcast_shape[3] = b1; a1 = 1. - // broadcast_shape[2] = b2 = a2. - // broadcast_shape[1] = a3; b3 = 1. - // broadcast_shape[0] = b4 = a4. - int broadcast_shape[5]; -}; - -struct ConcatenationParams { - int8_t axis; - const int32_t* input_zeropoint; - const float* input_scale; - uint16_t inputs_count; - int32_t output_zeropoint; - float output_scale; -}; - -struct ComparisonParams { - // uint8_t inference params. - int left_shift; - int32_t input1_offset; - int32_t input1_multiplier; - int input1_shift; - int32_t input2_offset; - int32_t input2_multiplier; - int input2_shift; - // Shape dependent / common to inference types. - bool is_broadcast; -}; - -struct ConvParams { - PaddingType padding_type; - PaddingValues padding_values; - // TODO(starka): This was just "stride", so check that width+height is OK. - int16_t stride_width; - int16_t stride_height; - int16_t dilation_width_factor; - int16_t dilation_height_factor; - // uint8_t inference params. - // TODO(b/65838351): Use smaller types if appropriate. - int32_t input_offset; - int32_t weights_offset; - int32_t output_offset; - int32_t output_multiplier; - int output_shift; - // uint8_t, etc, activation params. - int32_t quantized_activation_min; - int32_t quantized_activation_max; - // float activation params. - float float_activation_min; - float float_activation_max; -}; - -struct Conv3DParams { - Padding3DValues padding_values; - int stride_width; - int stride_height; - int stride_depth; - int dilation_width; - int dilation_height; - int dilation_depth; - // float activation params. - float float_activation_min; - float float_activation_max; -}; - -typedef Conv3DParams Conv3DTransposeParams; - -struct DepthToSpaceParams { - int32_t block_size; -}; - -struct DepthwiseParams { - PaddingType padding_type; - PaddingValues padding_values; - int16_t stride_width; - int16_t stride_height; - int16_t dilation_width_factor; - int16_t dilation_height_factor; - int16_t depth_multiplier; - // uint8_t inference params. - // TODO(b/65838351): Use smaller types if appropriate. - int32_t input_offset; - int32_t weights_offset; - int32_t output_offset; - int32_t output_multiplier; - int output_shift; - // uint8_t, etc, activation params. - int32_t quantized_activation_min; - int32_t quantized_activation_max; - // float activation params. - float float_activation_min; - float float_activation_max; - const int32_t* output_multiplier_per_channel; - const int32_t* output_shift_per_channel; -}; - -struct DequantizationParams { - double scale; - int32_t zero_point; -}; - -struct PerChannelDequantizationParams { - const float* scale; - const int32_t* zero_point; - int32_t quantized_dimension; -}; - -struct FakeQuantParams { - MinMax minmax; - int32_t num_bits; -}; - -struct FullyConnectedParams { - // uint8_t inference params. - // TODO(b/65838351): Use smaller types if appropriate. - int32_t input_offset; - int32_t weights_offset; - int32_t output_offset; - int32_t output_multiplier; - int output_shift; - // uint8_t, etc, activation params. - int32_t quantized_activation_min; - int32_t quantized_activation_max; - // float activation params. - float float_activation_min; - float float_activation_max; - // Mark the operands as cacheable if they are unchanging, e.g. weights. - bool lhs_cacheable; - bool rhs_cacheable; - FullyConnectedWeightsFormat weights_format; -}; - -struct GatherParams { - int16_t axis; - int16_t batch_dims; -}; - -struct L2NormalizationParams { - // uint8_t inference params. - int32_t input_zero_point; -}; - -struct LocalResponseNormalizationParams { - int32_t range; - double bias; - double alpha; - double beta; -}; - -struct HardSwishParams { - // zero_point of the input activations. - int16_t input_zero_point; - // zero_point of the output activations. - int16_t output_zero_point; - // 16bit fixed-point component of the multiplier to apply to go from the - // "high-res input scale", which is the input scale multiplied by 2^7, to the - // "relu-ish scale", which 3.0/32768. - // See the implementation of HardSwishPrepare. - int16_t reluish_multiplier_fixedpoint_int16; - // exponent/bit-shift component of the aforementioned multiplier. - int reluish_multiplier_exponent; - // 16bit fixed-point component of the multiplier to apply to go from the - // "high-res input scale", which is the input scale multiplied by 2^7, to the - // output scale. - // See the implementation of HardSwishPrepare. - int16_t output_multiplier_fixedpoint_int16; - // exponent/bit-shift component of the aforementioned multiplier. - int output_multiplier_exponent; -}; - -struct LogisticParams { - // uint8_t inference params. - int32_t input_zero_point; - int32_t input_range_radius; - int32_t input_multiplier; - int input_left_shift; -}; - -struct LstmCellParams { - int32_t weights_zero_point; - int32_t accum_multiplier; - int accum_shift; - int state_integer_bits; -}; - -struct MeanParams { - int8_t axis_count; - int16_t axis[4]; -}; - -struct PackParams { - int8_t axis; - const int32_t* input_zeropoint; - const float* input_scale; - uint16_t inputs_count; - int32_t output_zeropoint; - float output_scale; -}; - -struct PadParams { - int8_t left_padding_count; - int32_t left_padding[5]; - int8_t right_padding_count; - int32_t right_padding[5]; - ResizingCategory resizing_category; -}; - -struct PreluParams { - int32_t input_offset; - int32_t alpha_offset; - int32_t output_offset; - int32_t output_multiplier_1; - int output_shift_1; - int32_t output_multiplier_2; - int output_shift_2; -}; - -struct PoolParams { - FusedActivationFunctionType activation; - PaddingType padding_type; - PaddingValues padding_values; - int stride_height; - int stride_width; - int filter_height; - int filter_width; - // uint8_t, etc, activation params. - int32_t quantized_activation_min; - int32_t quantized_activation_max; - // float activation params. - float float_activation_min; - float float_activation_max; -}; - -struct ReshapeParams { - int8_t shape_count; - int32_t shape[4]; -}; - -struct ResizeBilinearParams { - bool align_corners; - // half_pixel_centers assumes pixels are of half the actual dimensions, and - // yields more accurate resizes. Corresponds to the same argument for the - // original TensorFlow op in TF2.0. - bool half_pixel_centers; -}; - -struct ResizeNearestNeighborParams { - bool align_corners; - bool half_pixel_centers; -}; - -struct SliceParams { - int8_t begin_count; - int32_t begin[5]; - int8_t size_count; - int32_t size[5]; -}; - -struct SoftmaxParams { - // beta is not really used (not a Tensorflow parameter) and not implemented - // for LogSoftmax. - double beta; - // uint8_t inference params. Used even when beta defaults to 1.0. - int32_t input_multiplier; - int32_t input_left_shift; - // Reverse scaling is only used by LogSoftmax. - int32_t reverse_scaling_divisor; - int32_t reverse_scaling_right_shift; - int diff_min; - int32_t zero_point; - float scale; - float* table; - // int16 LUT for exp(x), where x uniform distributed between [-10.0 , 0.0] - int16_t* exp_lut; - // int16 LUT for 1 / (1 + x), where x uniform distributed between [0.0 , 1.0] - int16_t* one_over_one_plus_x_lut; - uint8_t* uint8_table1; - uint8_t* uint8_table2; -}; - -struct SpaceToBatchParams { - // "Zero" padding for uint8_t means padding with the output offset. - int32_t output_offset; -}; - -struct SpaceToDepthParams { - int32_t block_size; -}; - -struct SplitParams { - // Graphs that split into, say, 2000 nodes are encountered. The indices in - // OperatorEdges are of type uint16_t. - uint16_t num_split; - int16_t axis; -}; - -struct SqueezeParams { - int8_t squeeze_dims_count; - int32_t squeeze_dims[4]; -}; - -struct StridedSliceParams { - int8_t start_indices_count; - int32_t start_indices[5]; - int8_t stop_indices_count; - int32_t stop_indices[5]; - int8_t strides_count; - int32_t strides[5]; - - uint16_t begin_mask; - uint16_t ellipsis_mask; - uint16_t end_mask; - uint16_t new_axis_mask; - uint16_t shrink_axis_mask; -}; - -struct TanhParams { - int32_t input_zero_point; - int32_t input_range_radius; - int32_t input_multiplier; - int input_left_shift; -}; - -struct TransposeParams { - int8_t perm_count; - int32_t perm[5]; -}; - -struct UnpackParams { - uint16_t num_split; - int16_t axis; -}; - -struct LeakyReluParams { - float alpha; - int32_t input_offset; - int32_t output_offset; - int32_t output_multiplier_alpha; - int32_t output_shift_alpha; - int32_t output_multiplier_identity; - int32_t output_shift_identity; -}; - -template -inline void SetActivationParams(float min, float max, P* params) { - params->float_activation_min = min; - params->float_activation_max = max; -} - -template -inline void SetActivationParams(int32_t min, int32_t max, P* params) { - params->quantized_activation_min = min; - params->quantized_activation_max = max; -} - -template -inline void SetActivationParams(int64_t min, int64_t max, P* params) { - params->int64_activation_min = min; - params->int64_activation_max = max; -} - -template -inline void GetActivationParams(const P& params, int32_t* min, int32_t* max) { - *min = params.quantized_activation_min; - *max = params.quantized_activation_max; -} - -template -inline void GetActivationParams(const P& params, float* min, float* max) { - *min = params.float_activation_min; - *max = params.float_activation_max; -} - -template -inline void GetActivationParams(const P& params, int64_t* min, int64_t* max) { - *min = params.int64_activation_min; - *max = params.int64_activation_max; -} - -// Type trait to check of given type has size smaller than 4 bytes. -template -struct is_small_integer - : public std::integral_constant::value || - std::is_same::value || - std::is_same::value || - std::is_same::value> {}; - -// Type trait to check of given type is int32 or int64. -template -struct is_int32_or_int64 - : public std::integral_constant::value || - std::is_same::value> { -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/kernel_util.cc b/code/components/tflite-lib/tensorflow/lite/kernels/kernel_util.cc deleted file mode 100644 index 10b37ed3..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/kernel_util.cc +++ /dev/null @@ -1,593 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/kernels/kernel_util.h" - -#include -#include - -#include -#include -#include -#include -#ifndef TF_LITE_STATIC_MEMORY -#include -#endif // TF_LITE_STATIC_MEMORY - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/context_util.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" - -#if defined(__APPLE__) -#include "TargetConditionals.h" -#endif - -namespace tflite { - -namespace { - -// Assumes tensor_index is a valid index (in bounds) -inline TfLiteTensor* GetTensorAtIndex(const TfLiteContext* context, - int tensor_index) { - if (context->tensors != nullptr) { - return &context->tensors[tensor_index]; - } else { - return context->GetTensor(context, tensor_index); - } -} - -// Validate in a single place to reduce binary size -inline TfLiteStatus ValidateTensorIndexingSafe(const TfLiteContext* context, - int index, int max_size, - const int* tensor_indices, - int* tensor_index) { - if (index < 0 || index >= max_size) { - TF_LITE_KERNEL_LOG(const_cast(context), - "Invalid tensor index %d (not in [0, %d))\n", index, - max_size); - return kTfLiteError; - } - if (tensor_indices[index] == kTfLiteOptionalTensor) { - TF_LITE_KERNEL_LOG(const_cast(context), - "Tensor at index %d was optional but was expected\n", - index); - return kTfLiteError; - } - - *tensor_index = tensor_indices[index]; - return kTfLiteOk; -} - -// Same as above but returns -1 for invalid inputs instead of status + logging -// error. -inline int ValidateTensorIndexing(const TfLiteContext* context, int index, - int max_size, const int* tensor_indices) { - if (index >= 0 && index < max_size) { - const int tensor_index = tensor_indices[index]; - if (tensor_index != kTfLiteOptionalTensor) { - return tensor_index; - } - } - return -1; -} - -inline TfLiteTensor* GetMutableInput(const TfLiteContext* context, - const TfLiteNode* node, int index) { - const int tensor_index = ValidateTensorIndexing( - context, index, node->inputs->size, node->inputs->data); - if (tensor_index < 0) { - return nullptr; - } - return GetTensorAtIndex(context, tensor_index); -} - -inline TfLiteStatus GetMutableInputSafe(const TfLiteContext* context, - const TfLiteNode* node, int index, - const TfLiteTensor** tensor) { - int tensor_index; - TF_LITE_ENSURE_OK( - context, ValidateTensorIndexingSafe(context, index, node->inputs->size, - node->inputs->data, &tensor_index)); - *tensor = GetTensorAtIndex(context, tensor_index); - return kTfLiteOk; -} - -} // anonymous namespace. - -const TfLiteTensor* GetInput(const TfLiteContext* context, - const TfLiteNode* node, int index) { - return GetMutableInput(context, node, index); -} - -TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node, - int index, const TfLiteTensor** tensor) { - return GetMutableInputSafe(context, node, index, tensor); -} - -TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, - int index) { - TfLiteTensor* tensor = GetMutableInput(context, node, index); - if (tensor == nullptr) return nullptr; - return tensor->is_variable ? tensor : nullptr; -} - -TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, - int index) { - const int tensor_index = ValidateTensorIndexing( - context, index, node->outputs->size, node->outputs->data); - if (tensor_index < 0) { - return nullptr; - } - return GetTensorAtIndex(context, tensor_index); -} - -TfLiteStatus GetOutputSafe(const TfLiteContext* context, const TfLiteNode* node, - int index, TfLiteTensor** tensor) { - int tensor_index; - TF_LITE_ENSURE_OK( - context, ValidateTensorIndexingSafe(context, index, node->outputs->size, - node->outputs->data, &tensor_index)); - *tensor = GetTensorAtIndex(context, tensor_index); - return kTfLiteOk; -} - -const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, - const TfLiteNode* node, int index) { - return GetInput(context, node, index); -} - -#ifndef TF_LITE_STATIC_MEMORY -TfLiteTensor* GetTemporary(TfLiteContext* context, const TfLiteNode* node, - int index) { - const int tensor_index = ValidateTensorIndexing( - context, index, node->temporaries->size, node->temporaries->data); - if (tensor_index < 0) { - return nullptr; - } - return GetTensorAtIndex(context, tensor_index); -} - -TfLiteStatus GetTemporarySafe(const TfLiteContext* context, - const TfLiteNode* node, int index, - TfLiteTensor** tensor) { - int tensor_index; - TF_LITE_ENSURE_OK(context, ValidateTensorIndexingSafe( - context, index, node->temporaries->size, - node->temporaries->data, &tensor_index)); - *tensor = GetTensorAtIndex(context, tensor_index); - return kTfLiteOk; -} - -const TfLiteTensor* GetIntermediates(TfLiteContext* context, - const TfLiteNode* node, int index) { - const int tensor_index = ValidateTensorIndexing( - context, index, node->intermediates->size, node->intermediates->data); - if (tensor_index < 0) { - return nullptr; - } - return GetTensorAtIndex(context, tensor_index); -} - -TfLiteStatus GetIntermediatesSafe(const TfLiteContext* context, - const TfLiteNode* node, int index, - TfLiteTensor** tensor) { - int tensor_index; - TF_LITE_ENSURE_OK(context, ValidateTensorIndexingSafe( - context, index, node->intermediates->size, - node->intermediates->data, &tensor_index)); - *tensor = GetTensorAtIndex(context, tensor_index); - return kTfLiteOk; -} -#endif // TF_LITE_STATIC_MEMORY - -// Per-axis -TfLiteStatus PopulateConvolutionQuantizationParams( - TfLiteContext* context, const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, - const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, - int32_t* output_activation_min, int32_t* output_activation_max, - int32_t* per_channel_multiplier, int32_t* per_channel_shift) { - const auto* affine_quantization = - reinterpret_cast(filter->quantization.params); - return PopulateConvolutionQuantizationParams( - context, input, filter, bias, output, activation, multiplier, shift, - output_activation_min, output_activation_max, per_channel_multiplier, - per_channel_shift, affine_quantization->scale->size); -} - -// Per-axis & per-tensor -TfLiteStatus PopulateConvolutionQuantizationParams( - TfLiteContext* context, const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, - const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, - int32_t* output_activation_min, int32_t* output_activation_max, - int32_t* per_channel_multiplier, int32_t* per_channel_shift, - int num_channels) { - TF_LITE_ENSURE_EQ(context, input->quantization.type, - kTfLiteAffineQuantization); - TF_LITE_ENSURE_EQ(context, filter->quantization.type, - kTfLiteAffineQuantization); - // TODO(jianlijianli): Enable bias type check and bias scale == input scale - // * filter scale for each channel in affine quantization once bias - // quantization is properly populated. - // TF_LITE_ENSURE_EQ(context, bias->quantization.type, - // kTfLiteAffineQuantization); - - // Check data type. - const auto* affine_quantization = - reinterpret_cast(filter->quantization.params); - TF_LITE_ENSURE(context, affine_quantization); - TF_LITE_ENSURE(context, affine_quantization->scale); - const bool is_per_channel = affine_quantization->scale->size > 1; - if (is_per_channel) { - // Currently only Int8/Int16 is supported for per channel quantization. - TF_LITE_ENSURE(context, - input->type == kTfLiteInt8 || input->type == kTfLiteInt16); - TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8); - TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, num_channels); - TF_LITE_ENSURE_EQ( - context, num_channels, - filter->dims->data[affine_quantization->quantized_dimension]); - } - - // Populate multiplier and shift using affine quantization. - const float input_scale = input->params.scale; - const float output_scale = output->params.scale; - const float* filter_scales = affine_quantization->scale->data; - for (int i = 0; i < num_channels; ++i) { - // If per-tensor quantization parameter is specified, broadcast it along the - // quantization dimension (channels_out). - const float scale = is_per_channel ? filter_scales[i] : filter_scales[0]; - const double filter_scale = static_cast(scale); - const double effective_output_scale = static_cast(input_scale) * - filter_scale / - static_cast(output_scale); - int32_t significand; - int channel_shift; - QuantizeMultiplier(effective_output_scale, &significand, &channel_shift); - per_channel_multiplier[i] = significand; - per_channel_shift[i] = channel_shift; - } - - // Populate scalar quantization parameters. - // This check on legacy quantization parameters is kept only for backward - // compatibility. - if (input->type == kTfLiteUInt8) { - // Check bias scale == input scale * filter scale. - double real_multiplier = 0.0; - TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( - context, input, filter, bias, output, &real_multiplier)); - int exponent; - - // Populate quantization parameters with multiplier and shift. - QuantizeMultiplier(real_multiplier, multiplier, &exponent); - *shift = -exponent; - } - if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 || - input->type == kTfLiteInt16) { - TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( - context, activation, output, output_activation_min, - output_activation_max)); - } - return kTfLiteOk; -} - -TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, - const TfLiteTensor* input, - const TfLiteTensor* filter, - const TfLiteTensor* bias, - TfLiteTensor* output, - double* multiplier) { - const double input_product_scale = static_cast(input->params.scale) * - static_cast(filter->params.scale); - // The following conditions must be guaranteed by the training pipeline. - if (bias) { - const double bias_scale = static_cast(bias->params.scale); - // Here we're making sure the input_product_scale & bias_scale are about the - // same. Since we have: - // (output - output_zp) * output_scale = - // input_product_scale * input_product + bias * bias_scale ---- (0) - // - // (0) equals: - // (input_product + bias) * input_product_scale ----- (1) - // + - // bias * (bias_scale - input_product_scale) ------ (2) - // - // For the real kernel computation, we're doing (1), so we really need to - // make sure (2) has minimum impact on the output, so: - // bias * (bias_scale - input_product_scale) / output_scale should be - // a small number for an integer. - // Since normally bias should be within a small range. - // We should expect (bias_scale - input_product_scale) / output_scale to - // be a small number like 0.02. - const double scale_diff = std::abs(input_product_scale - bias_scale); - const double output_scale = static_cast(output->params.scale); - - TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02); - } - return GetQuantizedConvolutionMultipler(context, input, filter, output, - multiplier); -} - -TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, - const TfLiteTensor* input, - const TfLiteTensor* filter, - TfLiteTensor* output, - double* multiplier) { - const double input_product_scale = - static_cast(input->params.scale * filter->params.scale); - TF_LITE_ENSURE(context, input_product_scale >= 0); - *multiplier = input_product_scale / static_cast(output->params.scale); - - return kTfLiteOk; -} - -namespace { - -inline TfLiteStatus Quantize(TfLiteContext* context, float scale, - int32_t zero_point, float f, int32_t& q) { - const float tmp = TfLiteRound(f / scale); - const bool no_integer_overflow_from_quantization = - (tmp >= static_cast(std::numeric_limits::min()) && - tmp <= static_cast(std::numeric_limits::max())); - TF_LITE_ENSURE(context, no_integer_overflow_from_quantization); - q = zero_point + static_cast(tmp); - return kTfLiteOk; -} - -TfLiteStatus CalculateActivationRangeQuantizedImpl( - TfLiteContext* context, TfLiteFusedActivation activation, int32_t qmin, - int32_t qmax, TfLiteTensor* output, int32_t* act_min, int32_t* act_max) { - const auto scale = output->params.scale; - const auto zero_point = output->params.zero_point; - - int32_t tmp_q; - if (activation == kTfLiteActRelu) { - TF_LITE_ENSURE_OK(context, - Quantize(context, scale, zero_point, 0.0, tmp_q)); - *act_min = std::max(qmin, tmp_q); - *act_max = qmax; - } else if (activation == kTfLiteActRelu6) { - TF_LITE_ENSURE_OK(context, - Quantize(context, scale, zero_point, 0.0, tmp_q)); - *act_min = std::max(qmin, tmp_q); - TF_LITE_ENSURE_OK(context, - Quantize(context, scale, zero_point, 6.0, tmp_q)); - *act_max = std::min(qmax, tmp_q); - } else if (activation == kTfLiteActReluN1To1) { - TF_LITE_ENSURE_OK(context, - Quantize(context, scale, zero_point, -1.0, tmp_q)); - *act_min = std::max(qmin, tmp_q); - TF_LITE_ENSURE_OK(context, - Quantize(context, scale, zero_point, 1.0, tmp_q)); - *act_max = std::min(qmax, tmp_q); - } else { - *act_min = qmin; - *act_max = qmax; - } - return kTfLiteOk; -} -} // namespace - -TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context, - TfLiteFusedActivation activation, - TfLiteTensor* output, - int32_t* act_min, - int32_t* act_max) { - int32_t qmin = 0; - int32_t qmax = 0; - if (output->type == kTfLiteUInt8) { - qmin = std::numeric_limits::min(); - qmax = std::numeric_limits::max(); - } else if (output->type == kTfLiteInt8) { - qmin = std::numeric_limits::min(); - qmax = std::numeric_limits::max(); - } else if (output->type == kTfLiteInt16) { - qmin = std::numeric_limits::min(); - qmax = std::numeric_limits::max(); - } else { - TF_LITE_ENSURE(context, false); - } - - return CalculateActivationRangeQuantizedImpl(context, activation, qmin, qmax, - output, act_min, act_max); -} - -bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) { - return TfLiteIntArrayEqual(input1->dims, input2->dims); -} - -#ifndef TF_LITE_STATIC_MEMORY -TfLiteStatus GetOutputShapeFromInput(TfLiteContext* context, - const TfLiteTensor* input, - TfLiteIntArray** output_shape) { - if (NumDimensions(input) != 1) { - TF_LITE_KERNEL_LOG(const_cast(context), - "Invalid %dD input tensor (must be a 1D tensor).", - NumDimensions(input)); - return kTfLiteError; - } - const int output_dims = SizeOfDimension(input, 0); - std::unique_ptr shape( - TfLiteIntArrayCreate(output_dims), TfLiteIntArrayFree); - for (int i = 0; i < output_dims; i++) { - shape->data[i] = input->data.i32[i]; - } - *output_shape = shape.release(); - return kTfLiteOk; -} - -// TODO(b/172067338): Having this function be part of TF_LITE_STATIC_MEMORY -// build results in a 6KB size increase, even though the function is unsused for -// that build. What appears to be happening is that while the linker drops the -// unsused function, the string library that gets pulled in is not dropped, -// resulting in the increased binary size. -const std::string GetShapeDebugString(const TfLiteIntArray* shape) { - std::string str; - for (int d = 0; d < shape->size; ++d) { - if (str.empty()) - str = "[" + std::to_string(shape->data[d]); - else - // Don't add space after "," to make the output consistent with - // tensorflow::shape_inference::InferenceContext::DebugString() - str += "," + std::to_string(shape->data[d]); - } - if (str.empty()) { - str = "[]"; - } else { - str += "]"; - } - return str; -} - -TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteIntArray** output_shape) { - const int dims1 = NumDimensions(input1); - const int dims2 = NumDimensions(input2); - const int out_dims = std::max(dims1, dims2); - - std::unique_ptr shape( - TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree); - for (int i = 0; i < out_dims; ++i) { - const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1); - const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1); - if (!(d1 == d2 || d1 == 1 || d2 == 1)) { - TF_LITE_KERNEL_LOG(context, - "Given shapes, %s and %s, are not broadcastable.", - GetShapeDebugString(input1->dims).c_str(), - GetShapeDebugString(input2->dims).c_str()); - return kTfLiteError; - } - - if (d1 == 0 || d2 == 0) { - shape->data[out_dims - i - 1] = 0; - } else { - shape->data[out_dims - i - 1] = std::max(d1, d2); - } - } - *output_shape = shape.release(); - return kTfLiteOk; -} - -TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - const TfLiteTensor* input3, - TfLiteIntArray** output_shape) { - const int dims1 = NumDimensions(input1); - const int dims2 = NumDimensions(input2); - const int dims3 = NumDimensions(input3); - const int out_dims = std::max(std::max(dims1, dims2), dims3); - std::unique_ptr shape( - TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree); - for (int i = 0; i < out_dims; ++i) { - const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1); - const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1); - const int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1); - const int min_value = std::min(std::min(d1, d2), d3); - int max_value = std::max(std::max(d1, d2), d3); - // If one dimention is 0, others must be 0 or 1. - if (min_value == 0) max_value = 0; - if (!(d1 == 1 || d1 == max_value) || !(d2 == 1 || d2 == max_value) || - !(d3 == 1 || d3 == max_value)) { - TF_LITE_KERNEL_LOG(context, - "Given shapes, %s, %s and %s, are not broadcastable.", - GetShapeDebugString(input1->dims).c_str(), - GetShapeDebugString(input2->dims).c_str(), - GetShapeDebugString(input3->dims).c_str()); - return kTfLiteError; - } - shape->data[out_dims - i - 1] = max_value; - } - *output_shape = shape.release(); - return kTfLiteOk; -} -#endif // TF_LITE_STATIC_MEMORY - -// Size of string is not constant, return 0 in such case. -int TfLiteTypeGetSize(TfLiteType type) { - switch (type) { - case kTfLiteUInt8: - static_assert(sizeof(uint8_t) == 1, ""); - return 1; - case kTfLiteInt8: - static_assert(sizeof(int8_t) == 1, ""); - return 1; - case kTfLiteBool: - return sizeof(bool); - case kTfLiteUInt16: - static_assert(sizeof(uint16_t) == 2, ""); - return 2; - case kTfLiteInt16: - static_assert(sizeof(int16_t) == 2, ""); - return 2; - case kTfLiteFloat16: - static_assert(sizeof(int16_t) == 2, ""); - return 2; - case kTfLiteFloat32: - static_assert(sizeof(float) == 4, ""); - return 4; - case kTfLiteInt32: - static_assert(sizeof(int32_t) == 4, ""); - return 4; - case kTfLiteUInt32: - static_assert(sizeof(uint32_t) == 4, ""); - return 4; - case kTfLiteInt64: - static_assert(sizeof(int64_t) == 8, ""); - return 8; - case kTfLiteUInt64: - static_assert(sizeof(uint64_t) == 8, ""); - return 8; - case kTfLiteFloat64: - static_assert(sizeof(double) == 8, ""); - return 8; - case kTfLiteComplex64: - static_assert(sizeof(std::complex) == 8, ""); - return 8; - case kTfLiteComplex128: - static_assert(sizeof(std::complex) == 16, ""); - return 16; - default: - return 0; - } -} - -bool IsMobilePlatform() { -#if defined(ANDROID) || defined(__ANDROID__) - return true; -#elif defined(__APPLE__) -#if TARGET_IPHONE_SIMULATOR || TARGET_OS_IPHONE - return true; -#endif -#endif - return false; -} - -bool HasUnspecifiedDimension(const TfLiteTensor* tensor) { -#ifndef TF_LITE_STATIC_MEMORY - if (tensor->dims_signature) { - for (int i : TfLiteIntArrayView(tensor->dims_signature)) { - if (i == -1) return true; - } - } -#endif // TF_LITE_STATIC_MEMORY - return false; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/kernel_util.h b/code/components/tflite-lib/tensorflow/lite/kernels/kernel_util.h deleted file mode 100644 index 06874422..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/kernel_util.h +++ /dev/null @@ -1,330 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_ -#define TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_ - -#include - -#include -#ifndef TF_LITE_STATIC_MEMORY -#include -#endif // TF_LITE_STATIC_MEMORY - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -// A fair number of functions in this header have historically been inline. -// It is ok to change functions to not be inline if the latency with -// benchmark_model for MobileNet + MobileBERT is unaffected. If such a change is -// made, move the newly non-inlined function declarations to the top of this -// header file. - -// Note: You must check if result is not null: -// -// TfLiteTensor* my_tensor = GetInput(context, node, kMyTensorIdx); -// TF_LITE_ENSURE(context, my_tensor != nullptr); -// -// This is because the index might point to the optional tensor constant -// (kTfLiteOptionalTensor) in which case there is no tensor to return. -const TfLiteTensor* GetInput(const TfLiteContext* context, - const TfLiteNode* node, int index); - -// Same as `GetInput` but returns boolean and uses output argument for tensor. -// -// TfLiteTensor* my_tensor; -// TF_LITE_ENSURE_OK(context, -// GetInputSafe(context, node, kMyTensorIdx, &my_tensor)); -// // can use my_tensor directly from here onwards, it is not nullptr -// -// Should be used in cases where the binary size is too large. -TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node, - int index, const TfLiteTensor** tensor); - -// Note: You must check if result is not null: -// -// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx); -// TF_LITE_ENSURE(context, my_tensor != nullptr); -// -// This is because the index might point to the optional tensor constant -// (kTfLiteOptionalTensor) in which case there is no tensor to return. -TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, - int index); - -// Note: You must check if result is not null: -// -// TfLiteTensor* my_tensor = GetOutput(context, node, kMyTensorIdx); -// TF_LITE_ENSURE(context, my_tensor != nullptr); -// -// This is because the index might point to the optional tensor constant -// (kTfLiteOptionalTensor) in which case there is no tensor to return. -TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, - int index); - -// Same as `GetOutput` but returns boolean and uses output argument for tensor. -// -// TfLiteTensor* my_tensor; -// TF_LITE_ENSURE_OK(context, -// GetOutputSafe(context, node, kMyTensorIdx, &my_tensor)); -// // can use my_tensor directly from here onwards, it is not nullptr -// -// Should be used in cases where the binary size is too large. -TfLiteStatus GetOutputSafe(const TfLiteContext* context, const TfLiteNode* node, - int index, TfLiteTensor** tensor); - -// Note: You must check if result is not null: -// -// TfLiteTensor* my_tensor = GetOptionalInputTensor(context, node, kIdx); -// TF_LITE_ENSURE(context, my_tensor != nullptr); -// -// This is because the index might point to the optional tensor constant -// (kTfLiteOptionalTensor) in which case there is no tensor to return. -// -// Deprecated. GetInput has the same functionality. -const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, - const TfLiteNode* node, int index); - -#ifndef TF_LITE_STATIC_MEMORY -// Note: You must check if result is not null: -// -// TfLiteTensor* my_tensor = GetTemporary(context, node, kMyTensorIdx); -// TF_LITE_ENSURE(context, my_tensor != nullptr); -// -// This is because the index might point to the optional tensor constant -// (kTfLiteOptionalTensor) in which case there is no tensor to return. -TfLiteTensor* GetTemporary(TfLiteContext* context, const TfLiteNode* node, - int index); - -// Same as `GetTemporary` but returns boolean and uses output argument for -// tensor. -// -// TfLiteTensor* my_tensor; -// TF_LITE_ENSURE_OK(context, -// GetTemporarySafe(context, node, kMyTensorIdx, -// &my_tensor)); -// // can use my_tensor directly from here onwards, it is not nullptr -// -// Should be used in cases where the binary size is too large. -TfLiteStatus GetTemporarySafe(const TfLiteContext* context, - const TfLiteNode* node, int index, - TfLiteTensor** tensor); - -// Note: You must check if result is not null: -// -// TfLiteTensor* my_tensor = GetIntermediates(context, node, kMyTensorIdx); -// TF_LITE_ENSURE(context, my_tensor != nullptr); -// -// This is because the index might point to the optional tensor constant -// (kTfLiteOptionalTensor) in which case there is no tensor to return. -const TfLiteTensor* GetIntermediates(TfLiteContext* context, - const TfLiteNode* node, int index); - -// Same as `GetIntermediates` but returns boolean and uses output argument for -// tensor. -// -// TfLiteTensor* my_tensor; -// TF_LITE_ENSURE_OK(context, -// GetIntermediatesSafe(context, node, kMyTensorIdx, -// &my_tensor)); -// // can use my_tensor directly from here onwards, it is not nullptr -// -// Should be used in cases where the binary size is too large. -TfLiteStatus GetIntermediatesSafe(const TfLiteContext* context, - const TfLiteNode* node, int index, - TfLiteTensor** tensor); -#endif // TF_LITE_STATIC_MEMORY - -inline int NumDimensions(const TfLiteTensor* t) { return t->dims->size; } -inline int SizeOfDimension(const TfLiteTensor* t, int dim) { - return t->dims->data[dim]; -} - -inline int NumInputs(const TfLiteNode* node) { - return node->inputs == nullptr ? 0 : node->inputs->size; -} -inline int NumOutputs(const TfLiteNode* node) { - return node->outputs == nullptr ? 0 : node->outputs->size; -} - -#ifndef TF_LITE_STATIC_MEMORY -inline int NumIntermediates(const TfLiteNode* node) { - return node->intermediates->size; -} -#endif // TF_LITE_STATIC_MEMORY - -inline int64_t NumElements(const TfLiteIntArray* dims) { - int64_t count = 1; - for (int i = 0; i < dims->size; ++i) { - count *= dims->data[i]; - } - return count; -} - -inline int64_t NumElements(const TfLiteTensor* t) { - return NumElements(t->dims); -} - -inline int64_t NumElements(const int* dims, int num_dims) { - int64_t count = 1; - for (int i = 0; i < num_dims; ++i) { - count *= dims[i]; - } - return count; -} - -// Determines whether tensor is constant. -// TODO(b/138199592): Introduce new query which checks for constant OR -// persistent-read-only, which would be useful for most tensor kernels that -// are potentially dynamic based on the input tensor value availability at the -// time of prepare. -inline bool IsConstantTensor(const TfLiteTensor* tensor) { - return tensor->allocation_type == kTfLiteMmapRo; -} - -inline bool IsConstantOrPersistentTensor(const TfLiteTensor* tensor) { - return IsConstantTensor(tensor) || - (tensor->allocation_type == kTfLitePersistentRo); -} - -// Determines whether tensor is dynamic. Note that a tensor can be non-const and -// not dynamic. This function specifically checks for a dynamic tensor. -inline bool IsDynamicTensor(const TfLiteTensor* tensor) { - return tensor->allocation_type == kTfLiteDynamic; -} - -// Sets tensor to dynamic. -inline void SetTensorToDynamic(TfLiteTensor* tensor) { - if (tensor->allocation_type != kTfLiteDynamic) { - tensor->allocation_type = kTfLiteDynamic; - tensor->data.raw = nullptr; - } -} - -// Sets tensor to persistent and read-only. -inline void SetTensorToPersistentRo(TfLiteTensor* tensor) { - if (tensor->allocation_type != kTfLitePersistentRo) { - tensor->allocation_type = kTfLitePersistentRo; - tensor->data.raw = nullptr; - } -} - -// Determines whether it is a hybrid op - one that has float inputs and -// quantized weights. -inline bool IsHybridOp(const TfLiteTensor* input, const TfLiteTensor* weight) { - return ((weight->type == kTfLiteUInt8 || weight->type == kTfLiteInt8) && - input->type == kTfLiteFloat32); -} - -// Check dimensionality match and populate OpData for Conv and DepthwiseConv. -TfLiteStatus PopulateConvolutionQuantizationParams( - TfLiteContext* context, const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, - const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, - int32_t* output_activation_min, int32_t* output_activation_max, - int32_t* per_channel_multiplier, int32_t* per_channel_shift); - -TfLiteStatus PopulateConvolutionQuantizationParams( - TfLiteContext* context, const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, - const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, - int32_t* output_activation_min, int32_t* output_activation_max, - int32_t* per_channel_multiplier, int32_t* per_channel_shift, - int num_channels); - -// Calculates the multiplication factor for a quantized convolution (or -// quantized depthwise convolution) involving the given tensors. Returns an -// error if the scales of the tensors are not compatible. -TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, - const TfLiteTensor* input, - const TfLiteTensor* filter, - const TfLiteTensor* bias, - TfLiteTensor* output, - double* multiplier); - -TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, - const TfLiteTensor* input, - const TfLiteTensor* filter, - TfLiteTensor* output, - double* multiplier); - -// Calculates the useful quantized range of an activation layer given its -// activation tensor. -TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context, - TfLiteFusedActivation activation, - TfLiteTensor* output, - int32_t* act_min, - int32_t* act_max); - -// Calculates the useful range of an activation layer given its activation -// tensor.a -template -void CalculateActivationRange(TfLiteFusedActivation activation, - T* activation_min, T* activation_max) { - if (activation == kTfLiteActRelu) { - *activation_min = 0; - *activation_max = std::numeric_limits::max(); - } else if (activation == kTfLiteActRelu6) { - *activation_min = 0; - *activation_max = 6; - } else if (activation == kTfLiteActReluN1To1) { - *activation_min = -1; - *activation_max = 1; - } else { - *activation_min = std::numeric_limits::lowest(); - *activation_max = std::numeric_limits::max(); - } -} - -// Return true if the given tensors have the same shape. -bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2); - -#if !defined(TF_LITE_STATIC_MEMORY) -// Gets the output shape from the input tensor. -TfLiteStatus GetOutputShapeFromInput(TfLiteContext* context, - const TfLiteTensor* input, - TfLiteIntArray** output_shape); - -const std::string GetShapeDebugString(const TfLiteIntArray* shape); - -#endif // !defined(TF_LITE_STATIC_MEMORY) - -// Calculates the output_shape that is necessary for element-wise operations -// with broadcasting involving the two input tensors. -TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteIntArray** output_shape); - -// Calculates the output_shape that is necessary for element-wise operations -// with broadcasting involving the three input tensors. -TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - const TfLiteTensor* input3, - TfLiteIntArray** output_shape); - -// Return the size of given type in bytes. Return 0 in case of string. -int TfLiteTypeGetSize(TfLiteType type); - -// Whether the current platform is mobile (Android or iOS). -bool IsMobilePlatform(); - -// Returns whether there is unspecified dimension in the tensor's dim signature. -bool HasUnspecifiedDimension(const TfLiteTensor* tensor); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/op_macros.h b/code/components/tflite-lib/tensorflow/lite/kernels/op_macros.h deleted file mode 100644 index 4255d253..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/op_macros.h +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_OP_MACROS_H_ -#define TENSORFLOW_LITE_KERNELS_OP_MACROS_H_ - -#include "tensorflow/lite/micro/debug_log.h" - -#if !defined(TF_LITE_MCU_DEBUG_LOG) -#include -#define TFLITE_ABORT abort() -#else -inline void AbortImpl() { - DebugLog("HALTED\n"); - while (1) { - } -} -#define TFLITE_ABORT AbortImpl(); -#endif - -#if defined(NDEBUG) -#define TFLITE_ASSERT_FALSE (static_cast(0)) -#else -#define TFLITE_ASSERT_FALSE TFLITE_ABORT -#endif - -#endif // TENSORFLOW_LITE_KERNELS_OP_MACROS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/kernels/padding.h b/code/components/tflite-lib/tensorflow/lite/kernels/padding.h deleted file mode 100644 index d9cca3ea..00000000 --- a/code/components/tflite-lib/tensorflow/lite/kernels/padding.h +++ /dev/null @@ -1,115 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_PADDING_H_ -#define TENSORFLOW_LITE_KERNELS_PADDING_H_ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -inline int ComputePadding(int stride, int dilation_rate, int in_size, - int filter_size, int out_size) { - int effective_filter_size = (filter_size - 1) * dilation_rate + 1; - int padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2; - return padding > 0 ? padding : 0; -} - -// It's not guaranteed that padding is symmetric. It's important to keep -// offset for algorithms need all paddings. -inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size, - int filter_size, int out_size, - int* offset) { - int effective_filter_size = (filter_size - 1) * dilation_rate + 1; - int total_padding = - ((out_size - 1) * stride + effective_filter_size - in_size); - total_padding = total_padding > 0 ? total_padding : 0; - *offset = total_padding % 2; - return total_padding / 2; -} - -// Matching GetWindowedOutputSize in TensorFlow. -inline int ComputeOutSize(TfLitePadding padding, int image_size, - int filter_size, int stride, int dilation_rate = 1) { - int effective_filter_size = (filter_size - 1) * dilation_rate + 1; - - // TODO(b/186448822): This uses 0 since the function has no other way to - // report error case - if (stride == 0) return 0; - - switch (padding) { - case kTfLitePaddingSame: - return (image_size + stride - 1) / stride; - case kTfLitePaddingValid: - return (image_size + stride - effective_filter_size) / stride; - default: - return 0; - } -} - -inline TfLitePaddingValues ComputePaddingHeightWidth( - int stride_height, int stride_width, int dilation_rate_height, - int dilation_rate_width, int in_height, int in_width, int filter_height, - int filter_width, TfLitePadding padding, int* out_height, int* out_width) { - *out_width = ComputeOutSize(padding, in_width, filter_width, stride_width, - dilation_rate_width); - *out_height = ComputeOutSize(padding, in_height, filter_height, stride_height, - dilation_rate_height); - - TfLitePaddingValues padding_values; - int offset = 0; - padding_values.height = - ComputePaddingWithOffset(stride_height, dilation_rate_height, in_height, - filter_height, *out_height, &offset); - padding_values.height_offset = offset; - padding_values.width = - ComputePaddingWithOffset(stride_width, dilation_rate_width, in_width, - filter_width, *out_width, &offset); - padding_values.width_offset = offset; - return padding_values; -} - -inline Padding3DValues ComputePadding3DValues( - int stride_height, int stride_width, int stride_depth, - int dilation_rate_height, int dilation_rate_width, int dilation_rate_depth, - int in_height, int in_width, int in_depth, int filter_height, - int filter_width, int filter_depth, TfLitePadding padding, int* out_height, - int* out_width, int* out_depth) { - *out_width = ComputeOutSize(padding, in_width, filter_width, stride_width, - dilation_rate_width); - *out_height = ComputeOutSize(padding, in_height, filter_height, stride_height, - dilation_rate_height); - *out_depth = ComputeOutSize(padding, in_depth, filter_depth, stride_depth, - dilation_rate_depth); - - Padding3DValues padding_values; - int offset = 0; - padding_values.depth = - ComputePaddingWithOffset(stride_depth, dilation_rate_depth, in_depth, - filter_depth, *out_depth, &offset); - padding_values.depth_offset = offset; - padding_values.height = - ComputePaddingWithOffset(stride_height, dilation_rate_height, in_height, - filter_height, *out_height, &offset); - padding_values.height_offset = offset; - padding_values.width = - ComputePaddingWithOffset(stride_width, dilation_rate_width, in_width, - filter_width, *out_width, &offset); - padding_values.width_offset = offset; - return padding_values; -} -} // namespace tflite - -#endif // TENSORFLOW_LITE_KERNELS_PADDING_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/all_ops_resolver.h b/code/components/tflite-lib/tensorflow/lite/micro/all_ops_resolver.h deleted file mode 100644 index 391b4f08..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/all_ops_resolver.h +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_ -#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_ - -#include "tensorflow/lite/micro/compatibility.h" -#include "tensorflow/lite/micro/micro_mutable_op_resolver.h" - -namespace tflite { - -// The magic number in the template parameter is the maximum number of ops that -// can be added to AllOpsResolver. It can be increased if needed. And most -// applications that care about the memory footprint will want to directly use -// MicroMutableOpResolver and have an application specific template parameter. -// The examples directory has sample code for this. -class AllOpsResolver : public MicroMutableOpResolver<128> { - public: - AllOpsResolver(); - - private: - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h b/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h deleted file mode 100644 index b92d6b2d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_ -#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_ - -#include -#include - -#include "tensorflow/lite/c/c_api_types.h" - -namespace tflite { -// Interface classes that the TFLM framework relies on to get buffers it needs. -// There are two types of buffers that the TFLM framework requires: persistent -// and non-persistent. Persistent buffers, once allocated, are never freed by -// the TFLM framework. Non-persist buffers can be allocated and deallocated by -// the TFLM framework. This file defines two interfaces classes that TFLM -// framework will rely on to manage these buffers. - -// Interface class for managing persistent buffers. -class IPersistentBufferAllocator { - public: - IPersistentBufferAllocator() {} - virtual ~IPersistentBufferAllocator() {} - - // Allocates persistent memory. The persistent buffer is never freed. - virtual uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) = 0; - - // Returns the size of all persistent allocations in bytes. - virtual size_t GetPersistentUsedBytes() const = 0; -}; - -// Interface class for managing non-persistent buffers. -// The default non-persistent buffers are temp buffers that are not resizable. -// Support of at least one resizable buffer is required. -class INonPersistentBufferAllocator { - public: - INonPersistentBufferAllocator() {} - virtual ~INonPersistentBufferAllocator() {} - - // Allocates a temporary buffer. This buffer is not resizable. - virtual uint8_t* AllocateTemp(size_t size, size_t alignment) = 0; - - // Signals that a temporary buffer is no longer needed. - virtual void DeallocateTemp(uint8_t* buf) = 0; - - // Returns true if all temporary buffers are already deallocated. - virtual bool IsAllTempDeallocated() = 0; - - // Signals that all temporary allocations can be reclaimed. TFLM calls this - // API when it knows that all temporary buffers that it requested has been - // deallocated. The goal of API is to facilitate implementations of - // INonPersistentBufferAllocator can reuse buffer with some reasonable - // complexity. - virtual TfLiteStatus ResetTempAllocations() = 0; - - // Returns a buffer that is resizable viable ResizeBuffer(). - virtual uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) = 0; - - // Resizes a buffer that is previously returned by the - // AllocateResizableBuffer. - virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size, - size_t alignment) = 0; - - // Frees up the memory occupied by the resizable buffer. - virtual TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) = 0; - - // Returns a pointer pointing to the start of the overlay memory, which is - // used for activation tensors and scratch buffers by kernels at Invoke stage. - virtual uint8_t* GetOverlayMemoryAddress() const = 0; - - // Reserves the size of the overlay memory. This overlay is reserved for the - // kernels at Invoke stage. This is referred to as the overlay because before - // Invoket state, the same memory can be used for temp buffers. The layout of - // the memory is planned by the memory planner separately at Invoke stage. - virtual TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size, - size_t alignment) = 0; - - // Returns the size of non-persistent buffer in use. - virtual size_t GetNonPersistentUsedBytes() const = 0; - - // Returns the number of bytes available with a given alignment. This number - // takes in account any temporary allocations. - virtual size_t GetAvailableMemory(size_t alignment) const = 0; -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.cc b/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.cc deleted file mode 100644 index 6389da40..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.cc +++ /dev/null @@ -1,170 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h" - -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -NonPersistentArenaBufferAllocator::NonPersistentArenaBufferAllocator( - uint8_t* buffer, size_t buffer_size) - : buffer_head_(buffer), - buffer_tail_(buffer + buffer_size), - head_temp_(buffer), - next_temp_(buffer) {} - -NonPersistentArenaBufferAllocator::~NonPersistentArenaBufferAllocator() {} - -// Allocates a temporary buffer. This buffer is not resizable. -uint8_t* NonPersistentArenaBufferAllocator::AllocateTemp(size_t size, - size_t alignment) { - uint8_t* const aligned_result = AlignPointerUp(next_temp_, alignment); - const size_t available_memory = buffer_tail_ - aligned_result; - if (available_memory < size) { - MicroPrintf( - "Failed to allocate temp memory. Requested: %u, " - "available %u, missing: %u", - size, available_memory, size - available_memory); - return nullptr; - } - next_temp_ = aligned_result + size; - temp_buffer_ptr_check_sum_ ^= reinterpret_cast(aligned_result); - temp_buffer_count_++; - return aligned_result; -} - -// Signals that a temporary buffer is no longer needed. -void NonPersistentArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) { - temp_buffer_ptr_check_sum_ ^= reinterpret_cast(temp_buf); - temp_buffer_count_--; -} - -// Returns true if all temporary buffers are already deallocated. -bool NonPersistentArenaBufferAllocator::IsAllTempDeallocated() { - if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) { - MicroPrintf( - "Number of allocated temp buffers: %d. Checksum passing status: %d", - temp_buffer_count_, !temp_buffer_ptr_check_sum_); - return false; - } - return true; -} - -// Signals that all temporary allocations can be reclaimed. TFLM calls this -// API when it knows that all temporary buffers that it requested has been -// deallocated. The goal of API is to facilitate implementations of -// INonPersistentBufferAllocator can reuse buffer with some reasonable -// complexity. -TfLiteStatus NonPersistentArenaBufferAllocator::ResetTempAllocations() { - if (!IsAllTempDeallocated()) { - MicroPrintf( - "All temp buffers must be freed before calling ResetTempAllocations()"); - return kTfLiteError; - } - next_temp_ = head_temp_; - return kTfLiteOk; -} - -// Returns a buffer that is resizable viable ResizeBuffer(). -uint8_t* NonPersistentArenaBufferAllocator::AllocateResizableBuffer( - size_t size, size_t alignment) { - // Only supports one resizable buffer, which starts at the buffer head. - uint8_t* expected_resizable_buf = AlignPointerUp(buffer_head_, alignment); - - if (resizable_buffer_allocated_) { - MicroPrintf( - "Cannot allocate a new resizable buffer when one is already allocated"); - return nullptr; - } - - if (ResizeBuffer(expected_resizable_buf, size, alignment) == kTfLiteOk) { - resizable_buffer_allocated_ = true; - return expected_resizable_buf; - } - return nullptr; -} - -// Resizes a buffer that is previously returned by the AllocateResizableBuffer. -// Note that ResizeBuffer(old_resizable_buf, 0, 1) effectively deallocates -// a previous allocated resizable buffer. -TfLiteStatus NonPersistentArenaBufferAllocator::ResizeBuffer( - uint8_t* resizable_buf, size_t size, size_t alignment) { - // Only supports one resizable buffer, which starts at the buffer head. - uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment); - if (resizable_buf != expect_resizable_buf) { - MicroPrintf("Internal error: buffer is not resizable"); - return kTfLiteError; - } - if (head_temp_ != next_temp_) { - MicroPrintf("ResetTempAllocations() is not called before ResizeBuffer()."); - return kTfLiteError; - } - - const size_t available_memory = buffer_tail_ - expect_resizable_buf; - if (available_memory < size) { - MicroPrintf( - "Failed to resize buffer. Requested: %u, available %u, missing: %u", - size, available_memory, size - available_memory); - return kTfLiteError; - } - head_temp_ = expect_resizable_buf + size; - next_temp_ = head_temp_; - - return kTfLiteOk; -} - -// Frees up the memory occupied by the resizable buffer. -TfLiteStatus NonPersistentArenaBufferAllocator::DeallocateResizableBuffer( - uint8_t* resizable_buf) { - TfLiteStatus status = ResizeBuffer(resizable_buf, 0, 1); - if (status == kTfLiteOk) { - resizable_buffer_allocated_ = false; - } - return status; -} - -// Returns a pointer pointing to the start of the overlay memory, which is -// used for activation tensors and scratch buffers by kernels at Invoke stage. -uint8_t* NonPersistentArenaBufferAllocator::GetOverlayMemoryAddress() const { - return buffer_head_; -} - -// Reserves the size of the overlay memory. This overlay is reserved for the -// kernels at Invoke stage. This is referred to as the overlay because before -// Invoket state, the same memory can be used for temp buffers. The layout of -// the memory is planned by the memory planner separately at Invoke stage. -TfLiteStatus -NonPersistentArenaBufferAllocator::ReserveNonPersistentOverlayMemory( - size_t size, size_t alignment) { - uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment); - return ResizeBuffer(expect_resizable_buf, size, alignment); -} - -// Returns the size of non-persistent buffer in use. -size_t NonPersistentArenaBufferAllocator::GetNonPersistentUsedBytes() const { - return (next_temp_ - buffer_head_); -} - -// Returns the number of bytes available with a given alignment. This number -// takes in account any temporary allocations. -size_t NonPersistentArenaBufferAllocator::GetAvailableMemory( - size_t alignment) const { - uint8_t* const aligned_temp = AlignPointerUp(next_temp_, alignment); - uint8_t* const aligned_tail = AlignPointerDown(buffer_tail_, alignment); - return aligned_tail - aligned_temp; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h b/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h deleted file mode 100644 index 9eb4efeb..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ -#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ - -#include -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h" -#include "tensorflow/lite/micro/compatibility.h" - -namespace tflite { - -// Implement INonPersistentBufferAllocator on an arena that is dedicated for -// non-persistent buffers. -class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator { - public: - NonPersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size); - virtual ~NonPersistentArenaBufferAllocator(); - - // Allocates a temporary buffer. This buffer is not resizable. - uint8_t* AllocateTemp(size_t size, size_t alignment) override; - - // Signals that a temporary buffer is no longer needed. - void DeallocateTemp(uint8_t* buf) override; - - // Returns true if all temporary buffers are already deallocated. - bool IsAllTempDeallocated() override; - - // Signals that all temporary allocations can be reclaimed. TFLM calls this - // API when it knows that all temporary buffers that it requested has been - // deallocated. - TfLiteStatus ResetTempAllocations() override; - - // Returns a buffer that is resizable viable ResizeBuffer(). - uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) override; - - // Resizes a buffer that is previously returned by the - // AllocateResizableBuffer. - TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size, - size_t alignment) override; - - // Frees up the memory occupied by the resizable buffer. - TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) override; - - // Returns a pointer pointing to the start of the overlay memory, which is - // used for activation tensors and scratch buffers by kernels at Invoke stage. - uint8_t* GetOverlayMemoryAddress() const override; - - // Reserves the size of the overlay memory. This overlay is reserved for the - // kernels at Invoke stage. This is referred to as the overlay because before - // Invoket state, the same memory can be used for temp buffers. The layout of - // the memory is planned by the memory planner separately at Invoke stage. - TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size, - size_t alignment) override; - - // Returns the size of non-persistent buffer in use. - size_t GetNonPersistentUsedBytes() const override; - - // Returns the number of bytes available with a given alignment. This number - // takes in account any temporary allocations. - size_t GetAvailableMemory(size_t alignment) const override; - - TF_LITE_REMOVE_VIRTUAL_DELETE - - private: - // The memory arena that this allocator manages. - uint8_t* const buffer_head_; - uint8_t* const buffer_tail_; - - // The whole region is split into two parts: - // buffer_head_ to head_temp_ - 1 belongs to the only resizable buffer. - // head_temp_ to buffer_tail_ can be used for (non-resizable) temp buffers. - uint8_t* head_temp_; - - // next_temp_ points to the next available temp buffer allocation address and - // its range is between head_temp_ and buffer_tail_ - uint8_t* next_temp_; - - // XOR Check sum for outstanding temp buffers. - // If all temp buffers are deallocated OR no temp buffers are allocated, - // temp_buffer_ptr_check_sum_ == nullptr. - intptr_t temp_buffer_ptr_check_sum_ = 0; - // Count of outstanding temp buffers. - int temp_buffer_count_ = 0; - bool resizable_buffer_allocated_ = false; -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.cc b/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.cc deleted file mode 100644 index 0ccc8fb1..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.cc +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h" - -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -PersistentArenaBufferAllocator::PersistentArenaBufferAllocator( - uint8_t* buffer, size_t buffer_size) - : buffer_head_(buffer), - buffer_tail_(buffer + buffer_size), - tail_temp_(buffer_tail_) {} - -PersistentArenaBufferAllocator::~PersistentArenaBufferAllocator() {} - -uint8_t* PersistentArenaBufferAllocator::AllocatePersistentBuffer( - size_t size, size_t alignment) { - uint8_t* const aligned_result = - AlignPointerDown(tail_temp_ - size, alignment); - if (aligned_result < buffer_head_) { -#ifndef TF_LITE_STRIP_ERROR_STRINGS - const size_t missing_memory = buffer_head_ - aligned_result; - MicroPrintf( - "Failed to allocate tail memory. Requested: %u, " - "available %u, missing: %u", - size, size - missing_memory, missing_memory); -#endif - return nullptr; - } - tail_temp_ = aligned_result; - return aligned_result; -} - -size_t PersistentArenaBufferAllocator::GetPersistentUsedBytes() const { - return buffer_tail_ - tail_temp_; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h b/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h deleted file mode 100644 index 70de408f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ -#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ - -#include -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h" -#include "tensorflow/lite/micro/compatibility.h" - -namespace tflite { - -// PersistentArenaBufferAllocator is an implementatation of -// IPersistentBufferAllocator interface on an arena that is dedicated for -// persistent buffers. -class PersistentArenaBufferAllocator : public IPersistentBufferAllocator { - public: - PersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size); - virtual ~PersistentArenaBufferAllocator(); - - // Allocates persistent memory. The persistent buffer is never freed. - // Returns nullptr if errors occured. - uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override; - - // Returns the size of all persistent allocations in bytes. - size_t GetPersistentUsedBytes() const override; - - TF_LITE_REMOVE_VIRTUAL_DELETE - private: - // The memory arena that this allocator manages. - uint8_t* const buffer_head_; - uint8_t* const buffer_tail_; - - // The whole region is split into two parts: - // tail_temp_ to buffer_tail_ contains allocated buffers; - // buffer_head_ to tail_temp_ - 1 belongs to still available spaces. - // So in essence, the allocated region grows from the bottom and emulates - // SingleArenaBufferAllocator's persistent part. - uint8_t* tail_temp_; -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.cc b/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.cc deleted file mode 100644 index 0f24a0b5..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.cc +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h" - -#include - -#include "tensorflow/lite/kernels/internal/compatibility.h" - -namespace tflite { - -RecordingSingleArenaBufferAllocator::RecordingSingleArenaBufferAllocator( - ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) - : SingleArenaBufferAllocator(error_reporter, buffer_head, buffer_size), - requested_head_bytes_(0), - requested_tail_bytes_(0), - used_bytes_(0), - alloc_count_(0) {} - -RecordingSingleArenaBufferAllocator::~RecordingSingleArenaBufferAllocator() {} - -RecordingSingleArenaBufferAllocator* -RecordingSingleArenaBufferAllocator::Create(ErrorReporter* error_reporter, - uint8_t* buffer_head, - size_t buffer_size) { - TFLITE_DCHECK(error_reporter != nullptr); - TFLITE_DCHECK(buffer_head != nullptr); - RecordingSingleArenaBufferAllocator tmp = RecordingSingleArenaBufferAllocator( - error_reporter, buffer_head, buffer_size); - - uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer( - sizeof(RecordingSingleArenaBufferAllocator), - alignof(RecordingSingleArenaBufferAllocator)); - // Use the default copy constructor to populate internal states. - return new (allocator_buffer) RecordingSingleArenaBufferAllocator(tmp); -} - -size_t RecordingSingleArenaBufferAllocator::GetRequestedBytes() const { - return requested_head_bytes_ + requested_tail_bytes_; -} - -size_t RecordingSingleArenaBufferAllocator::GetUsedBytes() const { - return used_bytes_; -} - -size_t RecordingSingleArenaBufferAllocator::GetAllocatedCount() const { - return alloc_count_; -} - -TfLiteStatus RecordingSingleArenaBufferAllocator::ResizeBuffer( - uint8_t* resizable_buf, size_t size, size_t alignment) { - const uint8_t* previous_head = head(); - TfLiteStatus status = - SingleArenaBufferAllocator::ResizeBuffer(resizable_buf, size, alignment); - if (status == kTfLiteOk) { - used_bytes_ += head() - previous_head; - requested_head_bytes_ = size; - } - return status; -} - -uint8_t* RecordingSingleArenaBufferAllocator::AllocatePersistentBuffer( - size_t size, size_t alignment) { - const uint8_t* previous_tail = tail(); - uint8_t* result = - SingleArenaBufferAllocator::AllocatePersistentBuffer(size, alignment); - if (result != nullptr) { - used_bytes_ += previous_tail - tail(); - requested_tail_bytes_ += size; - alloc_count_++; - } - return result; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h b/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h deleted file mode 100644 index 3cec561e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ -#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ - -#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h" -#include "tensorflow/lite/micro/compatibility.h" - -namespace tflite { - -// Utility class used to log allocations of a SingleArenaBufferAllocator. Should -// only be used in debug/evaluation settings or unit tests to evaluate -// allocation usage. -class RecordingSingleArenaBufferAllocator : public SingleArenaBufferAllocator { - public: - RecordingSingleArenaBufferAllocator(ErrorReporter* error_reporter, - uint8_t* buffer_head, size_t buffer_size); - // TODO(b/157615197): Cleanup constructors/destructor and use factory - // functions. - ~RecordingSingleArenaBufferAllocator() override; - - static RecordingSingleArenaBufferAllocator* Create( - ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size); - - // Returns the number of bytes requested from the head or tail. - size_t GetRequestedBytes() const; - - // Returns the number of bytes actually allocated from the head or tail. This - // value will be >= to the number of requested bytes due to padding and - // alignment. - size_t GetUsedBytes() const; - - // Returns the number of alloc calls from the head or tail. - size_t GetAllocatedCount() const; - - TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size, - size_t alignment) override; - uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override; - - private: - size_t requested_head_bytes_; - size_t requested_tail_bytes_; - size_t used_bytes_; - size_t alloc_count_; - - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.cc b/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.cc deleted file mode 100644 index 15d512bd..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.cc +++ /dev/null @@ -1,209 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h" - -#include -#include -#include - -#include "tensorflow/lite/c/c_api_types.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -SingleArenaBufferAllocator::SingleArenaBufferAllocator( - ErrorReporter* error_reporter, uint8_t* buffer_head, uint8_t* buffer_tail) - : -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - error_reporter_(error_reporter), -#endif - buffer_head_(buffer_head), - buffer_tail_(buffer_tail), - head_(buffer_head), - tail_(buffer_tail), - temp_(buffer_head_) { -} - -SingleArenaBufferAllocator::SingleArenaBufferAllocator( - ErrorReporter* error_reporter, uint8_t* buffer, size_t buffer_size) - : SingleArenaBufferAllocator(error_reporter, buffer, buffer + buffer_size) { -} - -/* static */ -SingleArenaBufferAllocator* SingleArenaBufferAllocator::Create( - ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) { - TFLITE_DCHECK(error_reporter != nullptr); - TFLITE_DCHECK(buffer_head != nullptr); - SingleArenaBufferAllocator tmp = - SingleArenaBufferAllocator(error_reporter, buffer_head, buffer_size); - - // Allocate enough bytes from the buffer to create a - // SingleArenaBufferAllocator. The new instance will use the current adjusted - // tail buffer from the tmp allocator instance. - uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer( - sizeof(SingleArenaBufferAllocator), alignof(SingleArenaBufferAllocator)); - // Use the default copy constructor to populate internal states. - return new (allocator_buffer) SingleArenaBufferAllocator(tmp); -} - -SingleArenaBufferAllocator::~SingleArenaBufferAllocator() {} - -uint8_t* SingleArenaBufferAllocator::AllocateResizableBuffer(size_t size, - size_t alignment) { - // Only supports one resizable buffer, which starts at the buffer head. - uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment); - if (ResizeBuffer(expect_resizable_buf, size, alignment) == kTfLiteOk) { - return expect_resizable_buf; - } - return nullptr; -} - -TfLiteStatus SingleArenaBufferAllocator::DeallocateResizableBuffer( - uint8_t* resizable_buf) { - return ResizeBuffer(resizable_buf, 0, 1); -} - -TfLiteStatus SingleArenaBufferAllocator::ReserveNonPersistentOverlayMemory( - size_t size, size_t alignment) { - uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment); - return ResizeBuffer(expect_resizable_buf, size, alignment); -} - -TfLiteStatus SingleArenaBufferAllocator::ResizeBuffer(uint8_t* resizable_buf, - size_t size, - size_t alignment) { - // Only supports one resizable buffer, which starts at the buffer head. - uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment); - if (head_ != temp_ || resizable_buf != expect_resizable_buf) { - TF_LITE_REPORT_ERROR( - error_reporter_, - "Internal error: either buffer is not resizable or " - "ResetTempAllocations() is not called before ResizeBuffer()."); - return kTfLiteError; - } - - uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment); - const size_t available_memory = tail_ - aligned_result; - if (available_memory < size) { - TF_LITE_REPORT_ERROR( - error_reporter_, - "Failed to resize buffer. Requested: %u, available %u, missing: %u", - size, available_memory, size - available_memory); - return kTfLiteError; - } - head_ = aligned_result + size; - temp_ = head_; - - return kTfLiteOk; -} - -uint8_t* SingleArenaBufferAllocator::AllocatePersistentBuffer( - size_t size, size_t alignment) { - uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment); - if (aligned_result < head_) { -#ifndef TF_LITE_STRIP_ERROR_STRINGS - const size_t missing_memory = head_ - aligned_result; - TF_LITE_REPORT_ERROR(error_reporter_, - "Failed to allocate tail memory. Requested: %u, " - "available %u, missing: %u", - size, size - missing_memory, missing_memory); -#endif - return nullptr; - } - tail_ = aligned_result; - return aligned_result; -} - -uint8_t* SingleArenaBufferAllocator::AllocateTemp(size_t size, - size_t alignment) { - uint8_t* const aligned_result = AlignPointerUp(temp_, alignment); - const size_t available_memory = tail_ - aligned_result; - if (available_memory < size) { - TF_LITE_REPORT_ERROR(error_reporter_, - "Failed to allocate temp memory. Requested: %u, " - "available %u, missing: %u", - size, available_memory, size - available_memory); - return nullptr; - } - temp_ = aligned_result + size; - temp_buffer_ptr_check_sum_ ^= (reinterpret_cast(aligned_result)); - temp_buffer_count_++; - return aligned_result; -} - -void SingleArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) { - temp_buffer_ptr_check_sum_ ^= (reinterpret_cast(temp_buf)); - temp_buffer_count_--; -} - -bool SingleArenaBufferAllocator::IsAllTempDeallocated() { - if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) { - MicroPrintf( - "Number of allocated temp buffers: %d. Checksum passing status: %d", - temp_buffer_count_, !temp_buffer_ptr_check_sum_); - return false; - } - return true; -} - -TfLiteStatus SingleArenaBufferAllocator::ResetTempAllocations() { - // TODO(b/209453859): enable error check based on IsAllTempDeallocated after - // all AllocateTemp have been paird with DeallocateTemp - if (!IsAllTempDeallocated()) { - MicroPrintf( - "All temp buffers must be freed before calling ResetTempAllocations()"); - return kTfLiteError; - } - temp_ = head_; - return kTfLiteOk; -} - -uint8_t* SingleArenaBufferAllocator::GetOverlayMemoryAddress() const { - return buffer_head_; -} - -size_t SingleArenaBufferAllocator::GetNonPersistentUsedBytes() const { - return std::max(head_ - buffer_head_, temp_ - buffer_head_); -} - -size_t SingleArenaBufferAllocator::GetPersistentUsedBytes() const { - return buffer_tail_ - tail_; -} - -size_t SingleArenaBufferAllocator::GetAvailableMemory(size_t alignment) const { - uint8_t* const aligned_temp = AlignPointerUp(temp_, alignment); - uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment); - return aligned_tail - aligned_temp; -} - -size_t SingleArenaBufferAllocator::GetUsedBytes() const { - return GetPersistentUsedBytes() + GetNonPersistentUsedBytes(); -} - -size_t SingleArenaBufferAllocator::GetBufferSize() const { - return buffer_tail_ - buffer_head_; -} - -uint8_t* SingleArenaBufferAllocator::head() const { return head_; } - -uint8_t* SingleArenaBufferAllocator::tail() const { return tail_; } - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h b/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h deleted file mode 100644 index d3be1f23..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h +++ /dev/null @@ -1,152 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ -#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ - -#include -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h" -#include "tensorflow/lite/micro/compatibility.h" - -namespace tflite { - -// TODO(petewarden): This allocator never frees up or reuses any memory, even -// though we have enough information about lifetimes of the tensors to do so. -// This makes it pretty wasteful, so we should use a more intelligent method. -class SingleArenaBufferAllocator : public INonPersistentBufferAllocator, - public IPersistentBufferAllocator { - public: - // TODO(b/157615197): Cleanup constructors/destructor and use factory - // functions. - SingleArenaBufferAllocator(ErrorReporter* error_reporter, - uint8_t* buffer_head, uint8_t* buffer_tail); - SingleArenaBufferAllocator(ErrorReporter* error_reporter, uint8_t* buffer, - size_t buffer_size); - virtual ~SingleArenaBufferAllocator(); - - // Creates a new SingleArenaBufferAllocator from a given buffer head and size. - static SingleArenaBufferAllocator* Create(ErrorReporter* error_reporter, - uint8_t* buffer_head, - size_t buffer_size); - - // Resizes a buffer that is previously returned by the - // AllocateResizableBuffer. In current implementation, it Adjusts the head - // (lowest address and moving upwards) memory allocation to a given size. - // Calls to this method will also invalidate all temporary allocation values - // (it sets the location of temp space at the end of the head section). This - // call will fail if a chain of allocations through AllocateTemp() have not - // been cleaned up with a call to ResetTempAllocations(). - virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size, - size_t alignment) override; - - // Returns a buffer that is resizable viable ResizeBuffer(). Only one - // resizable buffer is currently supported. - virtual uint8_t* AllocateResizableBuffer(size_t size, - size_t alignment) override; - - // Frees up the memory occupied by the resizable buffer - virtual TfLiteStatus DeallocateResizableBuffer( - uint8_t* resizable_buf) override; - - // Reserves the non-persistent memory that is planned by the memory planner. - virtual TfLiteStatus ReserveNonPersistentOverlayMemory( - size_t size, size_t alignment) override; - - // Allocates persistent memory starting at the tail of the arena (highest - // address and moving downwards). - virtual uint8_t* AllocatePersistentBuffer(size_t size, - size_t alignment) override; - - // Allocates a temporary buffer from the head of the arena (lowest address and - // moving upwards) but does not update the actual head allocation size or - // position. The returned buffer is guaranteed until either - // ResetTempAllocations() is called or another call to AllocateFromHead(). - // Repeat calls to this function will create a chain of temp allocations. All - // calls to AllocateTemp() must end with a call to ResetTempAllocations(). If - // AllocateFromHead() is called before a call to ResetTempAllocations(), it - // will fail with an error message. - virtual uint8_t* AllocateTemp(size_t size, size_t alignment) override; - - // Signals that a temporary buffer is no longer needed. This is currently for - // book-keeping purpose and the memory region are not immediately available - // for re-use. The deallocated memory region are only reclaimed after - // ResetTempAllocations is called as it is right now. - virtual void DeallocateTemp(uint8_t* buf) override; - - // Returns true if all temporary buffers are already deallocated. - virtual bool IsAllTempDeallocated() override; - - // Resets a chain of temporary allocations back to the current head of the - // arena (lowest address). - virtual TfLiteStatus ResetTempAllocations() override; - - // Returns a pointer to the buffer currently assigned to the head section. - // This buffer is set by calling SetHeadSize(). - uint8_t* GetOverlayMemoryAddress() const override; - - // Returns the size of the head section in bytes. - size_t GetNonPersistentUsedBytes() const override; - - // Returns the size of all allocations in the tail section in bytes. - size_t GetPersistentUsedBytes() const override; - - // Returns the number of bytes available with a given alignment. This number - // takes in account any temporary allocations. - size_t GetAvailableMemory(size_t alignment) const override; - - // Returns the number of used bytes in the allocator. This number takes in - // account any temporary allocations. - size_t GetUsedBytes() const; - - TF_LITE_REMOVE_VIRTUAL_DELETE - - protected: - // Returns a pointer to the current end of the head buffer. - uint8_t* head() const; - - // Returns a pointer to the current end of the tail buffer. - uint8_t* tail() const; - - private: - size_t GetBufferSize() const; - -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - ErrorReporter* error_reporter_; -#endif - uint8_t* buffer_head_; - uint8_t* buffer_tail_; - uint8_t* head_; - uint8_t* tail_; - uint8_t* temp_; - - // The combination of the checksum of outstanding temporary buffer pointers - // AND the count of outstanding temporary buffer provide a low cost mechanism - // to audit temporary buffers' allocation and deallocation. - // - // XOR Check sum for outstanding temp buffers. - // If all temp buffers are deallocated OR no temp buffers are allocated, - // temp_buffer_ptr_check_sum_ == nullptr. - intptr_t temp_buffer_ptr_check_sum_ = 0; - // Count of outstanding temp buffers. - int temp_buffer_count_ = 0; -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/compatibility.h b/code/components/tflite-lib/tensorflow/lite/micro/compatibility.h deleted file mode 100644 index 49acb28f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/compatibility.h +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_ -#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_ - -// C++ will automatically create class-specific delete operators for virtual -// objects, which by default call the global delete function. For embedded -// applications we want to avoid this, and won't be calling new/delete on these -// objects, so we need to override the default implementation with one that does -// nothing to avoid linking in ::delete(). -// This macro needs to be included in all subclasses of a virtual base class in -// the private section. -#ifdef TF_LITE_STATIC_MEMORY -#define TF_LITE_REMOVE_VIRTUAL_DELETE \ - void operator delete(void* p) {} -#else -#define TF_LITE_REMOVE_VIRTUAL_DELETE -#endif - -#endif // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/debug_log.cc b/code/components/tflite-lib/tensorflow/lite/micro/debug_log.cc deleted file mode 100644 index 46ca253a..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/debug_log.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Reference implementation of the DebugLog() function that's required for a -// platform to support the TensorFlow Lite for Microcontrollers library. This is -// the only function that's absolutely required to be available on a target -// device, since it's used for communicating test results back to the host so -// that we can verify the implementation is working correctly. -// It's designed to be as easy as possible to supply an implementation though. -// On platforms that have a POSIX stack or C library, it can be written as a -// single call to `fprintf(stderr, "%s", s)` to output a string to the error -// stream of the console, but if there's no OS or C library available, there's -// almost always an equivalent way to write out a string to some serial -// interface that can be used instead. For example on Arm M-series MCUs, calling -// the `bkpt #0xAB` assembler instruction will output the string in r1 to -// whatever debug serial connection is available. If you're running mbed, you -// can do the same by creating `Serial pc(USBTX, USBRX)` and then calling -// `pc.printf("%s", s)`. -// To add an equivalent function for your own platform, create your own -// implementation file, and place it in a subfolder with named after the OS -// you're targeting. For example, see the Cortex M bare metal version in -// tensorflow/lite/micro/bluepill/debug_log.cc or the mbed one on -// tensorflow/lite/micro/mbed/debug_log.cc. - -#include "tensorflow/lite/micro/debug_log.h" - -#ifndef TF_LITE_STRIP_ERROR_STRINGS -#include -#endif - -extern "C" void DebugLog(const char* s) { -#ifndef TF_LITE_STRIP_ERROR_STRINGS - // Reusing TF_LITE_STRIP_ERROR_STRINGS to disable DebugLog completely to get - // maximum reduction in binary size. This is because we have DebugLog calls - // via TF_LITE_CHECK that are not stubbed out by TF_LITE_REPORT_ERROR. - fprintf(stderr, "%s", s); -#endif -} diff --git a/code/components/tflite-lib/tensorflow/lite/micro/debug_log.h b/code/components/tflite-lib/tensorflow/lite/micro/debug_log.h deleted file mode 100644 index c2840d0f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/debug_log.h +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_ -#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_ - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// This function should be implemented by each target platform, and provide a -// way for strings to be output to some text stream. For more information, see -// tensorflow/lite/micro/debug_log.cc. -void DebugLog(const char* s); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/fake_micro_context.cc b/code/components/tflite-lib/tensorflow/lite/micro/fake_micro_context.cc deleted file mode 100644 index 2403c6b1..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/fake_micro_context.cc +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/fake_micro_context.h" - -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h" -#include "tensorflow/lite/micro/micro_allocator.h" -#include "tensorflow/lite/micro/micro_arena_constants.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { -namespace { -// Dummy static variables to allow creation of dummy MicroAllocator. -// All tests are guarateed to run serially. -static constexpr int KDummyTensorArenaSize = 256; -static uint8_t dummy_tensor_arena[KDummyTensorArenaSize]; -} // namespace - -FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors, - SingleArenaBufferAllocator* allocator, - MicroGraph* micro_graph) - : MicroContext( - MicroAllocator::Create(dummy_tensor_arena, KDummyTensorArenaSize, - GetMicroErrorReporter()), - nullptr, micro_graph), - tensors_(tensors), - allocator_(allocator) {} - -TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) { - allocated_tensor_count_++; - return &tensors_[tensor_index]; -} - -void FakeMicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) { - allocated_tensor_count_--; -} - -bool FakeMicroContext::IsAllTempTfLiteTensorDeallocated() { - return !allocated_tensor_count_; -} - -TfLiteEvalTensor* FakeMicroContext::GetEvalTensor(int tensor_index) { - TfLiteEvalTensor* eval_tensor = - reinterpret_cast(allocator_->AllocateTemp( - sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor))); - TFLITE_DCHECK(eval_tensor != nullptr); - - // In unit tests, the TfLiteTensor pointer contains the source of truth for - // buffers and values: - eval_tensor->data = tensors_[tensor_index].data; - eval_tensor->dims = tensors_[tensor_index].dims; - eval_tensor->type = tensors_[tensor_index].type; - return eval_tensor; -} - -void* FakeMicroContext::AllocatePersistentBuffer(size_t bytes) { - // FakeMicroContext use SingleArenaBufferAllocator, which does not - // automatically apply the buffer alignment like MicroAllocator. The buffer - // alignment is potentially wasteful but allows the fake_micro_context to work - // correctly with optimized kernels. - return allocator_->AllocatePersistentBuffer(bytes, - MicroArenaBufferAlignment()); -} - -TfLiteStatus FakeMicroContext::RequestScratchBufferInArena(size_t bytes, - int* buffer_index) { - TFLITE_DCHECK(buffer_index != nullptr); - - if (scratch_buffer_count_ == kNumScratchBuffers_) { - MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).", - kNumScratchBuffers_); - return kTfLiteError; - } - - // For tests, we allocate scratch buffers from the tail and keep them around - // for the lifetime of model. This means that the arena size in the tests will - // be more than what we would have if the scratch buffers could share memory. - scratch_buffers_[scratch_buffer_count_] = - allocator_->AllocatePersistentBuffer(bytes, MicroArenaBufferAlignment()); - TFLITE_DCHECK(scratch_buffers_[scratch_buffer_count_] != nullptr); - - *buffer_index = scratch_buffer_count_++; - return kTfLiteOk; -} - -void* FakeMicroContext::GetScratchBuffer(int buffer_index) { - TFLITE_DCHECK(scratch_buffer_count_ <= kNumScratchBuffers_); - if (buffer_index >= scratch_buffer_count_) { - return nullptr; - } - return scratch_buffers_[buffer_index]; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/fake_micro_context.h b/code/components/tflite-lib/tensorflow/lite/micro/fake_micro_context.h deleted file mode 100644 index 31b39d38..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/fake_micro_context.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_ -#define TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_ - -#include "tensorflow/lite/micro/micro_context.h" -#include "tensorflow/lite/micro/micro_graph.h" - -namespace tflite { -// A fake of MicroContext for kernel util tests. -class FakeMicroContext : public MicroContext { - public: - FakeMicroContext(TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator, - MicroGraph* micro_graph); - - void* AllocatePersistentBuffer(size_t bytes) override; - TfLiteStatus RequestScratchBufferInArena(size_t bytes, - int* buffer_index) override; - void* GetScratchBuffer(int buffer_index) override; - - TfLiteTensor* AllocateTempTfLiteTensor(int tensor_index) override; - void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) override; - bool IsAllTempTfLiteTensorDeallocated(); - - TfLiteEvalTensor* GetEvalTensor(int tensor_index) override; - - private: - static constexpr int kNumScratchBuffers_ = 12; - - int scratch_buffer_count_ = 0; - uint8_t* scratch_buffers_[kNumScratchBuffers_]; - - TfLiteTensor* tensors_; - int allocated_tensor_count_ = 0; - - SingleArenaBufferAllocator* allocator_; - - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/flatbuffer_utils.cc b/code/components/tflite-lib/tensorflow/lite/micro/flatbuffer_utils.cc deleted file mode 100644 index 9996172b..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/flatbuffer_utils.cc +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/flatbuffer_utils.h" - -namespace tflite { - -FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size) - : flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {} - -int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const { - const uint8_t* elem = data_ + i * byte_width_; - return ::flexbuffers::ReadInt64(elem, byte_width_); -} - -uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const { - const uint8_t* elem = data_ + i * byte_width_; - return ::flexbuffers::ReadUInt64(elem, byte_width_); -} - -int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const { - return static_cast(ElementAsInt64(i)); -} - -bool FlexbufferWrapper::ElementAsBool(size_t i) const { - return static_cast(ElementAsUInt64(i)); -} - -double FlexbufferWrapper::ElementAsDouble(size_t i) const { - const uint8_t* elem = data_ + i * byte_width_; - return ::flexbuffers::ReadDouble(elem, byte_width_); -} - -float FlexbufferWrapper::ElementAsFloat(size_t i) const { - return static_cast(FlexbufferWrapper::ElementAsDouble(i)); -} - -// TODO(b/192589496): Ops must always be there. Remove this function when fixed -uint32_t NumSubgraphOperators(const SubGraph* subgraph) { - if (subgraph->operators() != nullptr) { - return subgraph->operators()->size(); - } else { - return 0; - } -} -// TODO(b/192589496): Ops must always be there. Remove this function when fixed -uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) { - const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx); - return NumSubgraphOperators(subgraph); -} - -TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray( - const flatbuffers::Vector* flatbuffer_array) { - // On little-endian machines, TfLiteIntArray happens to have the same memory - // layout as flatbuffers:Vector, so we can reinterpret_cast the - // flatbuffer vector and avoid a copy and malloc. - // TODO(b/188459715): audit this usage of const_cast. - return const_cast( - reinterpret_cast(flatbuffer_array)); -} - -TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray( - const flatbuffers::Vector* flatbuffer_array) { - // On little-endian machines, TfLiteFloatArray happens to have the same memory - // layout as flatbuffers:Vector, so we can reinterpret_cast the - // flatbuffer vector and avoid a copy and malloc. - // TODO(b/188459715): audit this usage of const_cast. - return const_cast( - reinterpret_cast(flatbuffer_array)); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/flatbuffer_utils.h b/code/components/tflite-lib/tensorflow/lite/micro/flatbuffer_utils.h deleted file mode 100644 index b4e0cdc2..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/flatbuffer_utils.h +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_ -#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_ - -#include "flatbuffers/flatbuffers.h" -#include "flatbuffers/flexbuffers.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { -// Kernels use flexbuffers::Map to pack their init parameters in a tflite file, -// with the parameter names as map keys and the parameter values as the -// corresponding map values. -// Accessing the map values using the flexbuffers:Map class is inline heavy, -// which can cause the code size to bloat beyond what's reasonable for a micro -// application. Use this class instead, when possible. -// FlexbufferWrapper takes advantage of the following properties of -// flexbuffers::Map: -// 1. It can be viewed as a flexbuffers::Vector of the values. -// 2. The values in the vector are ordered alphabetically by their keys. -// 3. All integer and Boolean values are stored as 64-bit numbers. -// 4. All floating point values are stored as double precision numbers. -// The properties are mentioned in the flexbuffers docs, but we rely on -// a unit test to catch design changes. -class FlexbufferWrapper : public flexbuffers::Vector { - public: - // Construct with a serialized flexbuffer 'buffer' of 'size' bytes - explicit FlexbufferWrapper(const uint8_t* buffer, size_t size); - int64_t ElementAsInt64(size_t i) const; - uint64_t ElementAsUInt64(size_t i) const; - int32_t ElementAsInt32(size_t i) const; - bool ElementAsBool(size_t i) const; - double ElementAsDouble(size_t i) const; - float ElementAsFloat(size_t i) const; -}; - -// Return the number of operators in a subgraph tflite -uint32_t NumSubgraphOperators(const SubGraph* subgraph); -uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx); - -// Converts a flatbuffer array to a TfLiteArray. -// TODO(b/188459715): These function convert a const input to a non-const via a -// const_cast. It is unclear exactly why this is required. -TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray( - const flatbuffers::Vector* flatbuffer_array); -TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray( - const flatbuffers::Vector* flatbuffer_array); - -} // namespace tflite - -#endif // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/activation_utils.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/activation_utils.h deleted file mode 100644 index 95ecc26d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/activation_utils.h +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_ - -#include -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/kernels/internal/cppmath.h" -#include "tensorflow/lite/kernels/internal/max.h" -#include "tensorflow/lite/kernels/internal/min.h" - -namespace tflite { -namespace ops { -namespace micro { - -// Returns the floating point value for a fused activation: -inline float ActivationValFloat(TfLiteFusedActivation act, float a) { - switch (act) { - case kTfLiteActNone: - return a; - case kTfLiteActRelu: - return TfLiteMax(0.0f, a); - case kTfLiteActReluN1To1: - return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f)); - case kTfLiteActRelu6: - return TfLiteMax(0.0f, TfLiteMin(a, 6.0f)); - case kTfLiteActTanh: - return std::tanh(a); - case kTfLiteActSignBit: - return std::signbit(a); - case kTfLiteActSigmoid: - return 1.0f / (1.0f + std::exp(-a)); - } - return 0.0f; // To indicate an unsupported activation (i.e. when a new fused - // activation is added to the enum and not handled here). -} - -} // namespace micro -} // namespace ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/activations.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/activations.cc deleted file mode 100644 index e0b79631..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/activations.cc +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/kernels/activations.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { -namespace { - -void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(ReluOpData)); -} - -TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - const ReluOpData& data = *(static_cast(node->user_data)); - - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kActivationsInputTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor); - - switch (input->type) { - case kTfLiteFloat32: { - ReluFloat(tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - - return kTfLiteOk; - } - case kTfLiteInt8: { - tflite::ReluQuantized(data, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorData(output)); - return kTfLiteOk; - } - default: { - MicroPrintf("Only float32 is supported currently, got %s", - TfLiteTypeGetName(input->type)); - return kTfLiteError; - } - } -} - -void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData)); -} - -TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - const Relu6OpData& data = *(static_cast(node->user_data)); - - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kActivationsInputTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor); - - switch (input->type) { - case kTfLiteFloat32: { - Relu6Float(tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - - return kTfLiteOk; - } - case kTfLiteInt8: { - Relu6Quantized(data.zero_int8, data.six_int8, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - return kTfLiteOk; - } - default: { - MicroPrintf("Only float32 is supported currently, got %s", - TfLiteTypeGetName(input->type)); - return kTfLiteError; - } - } -} - -} // namespace - -TfLiteRegistration Register_RELU() { - return tflite::micro::RegisterOp(ReluInit, ReluPrepare, ReluEval); -} - -TfLiteRegistration Register_RELU6() { - return tflite::micro::RegisterOp(Relu6Init, Relu6Prepare, Relu6Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/activations.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/activations.h deleted file mode 100644 index e953f0e0..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/activations.h +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_ - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -extern const int kActivationsInputTensor; -extern const int kActivationsOutputTensor; - -struct ReluOpData { - ReluParams params; -}; - -struct Relu6OpData { - int8_t six_int8; - int8_t zero_int8; -}; - -void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape, - const RuntimeShape& output_shape, const int8_t* input_data, - int8_t* output_data); - -template -void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output, - ReluOpData* data); - -void ReluFloat(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data); - -void Relu6Float(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data); - -void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& output_shape, - int8_t* output_data); - -TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node); - -TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/activations_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/activations_common.cc deleted file mode 100644 index 2ec3a1bf..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/activations_common.cc +++ /dev/null @@ -1,158 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/activations.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { - -const int kActivationsInputTensor = 0; -const int kActivationsOutputTensor = 0; - -void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape, - const RuntimeShape& output_shape, const int8_t* input_data, - int8_t* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - const int32_t val = static_cast(input_data[i]); - int32_t clamped = - data.params.output_offset + - MultiplyByQuantizedMultiplier(val - data.params.input_offset, - data.params.output_multiplier, - data.params.output_shift); - clamped = std::max(data.params.quantized_activation_min, clamped); - clamped = std::min(data.params.quantized_activation_max, clamped); - output_data[i] = static_cast(clamped); - } -} - -template -void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output, - ReluOpData* data) { - float act_min = 0.0; - float act_max = std::numeric_limits::infinity(); - double real_multiplier = - static_cast(input->params.scale / output->params.scale); - - const RuntimeShape input_shape = GetTensorShape(input); - const RuntimeShape output_shape = GetTensorShape(output); - - QuantizeMultiplier(real_multiplier, &data->params.output_multiplier, - &data->params.output_shift); - - data->params.quantized_activation_min = std::max( - static_cast(std::numeric_limits::min()), - output->params.zero_point + - static_cast(roundf(act_min / output->params.scale))); - data->params.quantized_activation_max = - act_max == std::numeric_limits::infinity() - ? static_cast(std::numeric_limits::max()) - : std::min(static_cast(std::numeric_limits::max()), - output->params.zero_point + - static_cast( - roundf(act_max / output->params.scale))); - data->params.input_offset = input->params.zero_point; - data->params.output_offset = output->params.zero_point; -} - -void ReluFloat(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - const float val = input_data[i]; - const float lower = 0.0f; - const float clamped = val < lower ? lower : val; - output_data[i] = clamped; - } -} - -void Relu6Float(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - const float val = input_data[i]; - const float upper = 6.0f; - const float lower = 0.0f; - const float clamped = val > upper ? upper : val < lower ? lower : val; - output_data[i] = clamped; - } -} - -void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape, - const int8_t* input_data, const RuntimeShape& output_shape, - int8_t* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - const int8_t val = input_data[i]; - const int8_t clamped = val > upper ? upper : val < lower ? lower : val; - output_data[i] = clamped; - } -} - -TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - ReluOpData* data = static_cast(node->user_data); - - MicroContext* micro_context = GetMicroContext(context); - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kActivationsInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kActivationsOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - if (input->type == kTfLiteInt8) { - CalculateReluOpData(input, output, data); - } - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - - return kTfLiteOk; -} - -TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - Relu6OpData* data = static_cast(node->user_data); - - MicroContext* micro_context = GetMicroContext(context); - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kActivationsInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - - if (input->type == kTfLiteInt8) { - data->six_int8 = FloatToQuantizedType(6.0f, input->params.scale, - input->params.zero_point); - data->zero_int8 = input->params.zero_point; - } - - micro_context->DeallocateTempTfLiteTensor(input); - - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/add.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/add.cc deleted file mode 100644 index f75db4e5..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/add.cc +++ /dev/null @@ -1,165 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/reference/add.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h" -#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/add.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, - const OpDataAdd* data, const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { - tflite::ArithmeticParams op_params; - SetActivationParams(data->output_activation_min_f32, - data->output_activation_max_f32, &op_params); - if (data->requires_broadcast) { - reference_ops::BroadcastAdd4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } -} - -TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteAddParams* params, const OpDataAdd* data, - const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, - TfLiteEvalTensor* output) { - tflite::ArithmeticParams op_params; - op_params.left_shift = data->left_shift; - op_params.input1_offset = data->input1_offset; - op_params.input1_multiplier = data->input1_multiplier; - op_params.input1_shift = data->input1_shift; - op_params.input2_offset = data->input2_offset; - op_params.input2_multiplier = data->input2_multiplier; - op_params.input2_shift = data->input2_shift; - op_params.output_offset = data->output_offset; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; - SetActivationParams(data->output_activation_min, data->output_activation_max, - &op_params); - bool need_broadcast = reference_ops::ProcessBroadcastShapes( - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorShape(input2), &op_params); - - switch (output->type) { - case kTfLiteInt8: { - if (need_broadcast) { - reference_integer_ops::BroadcastAdd4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - reference_integer_ops::Add( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } - break; - } - case kTfLiteInt16: { - if (need_broadcast) { - reference_ops::BroadcastAdd4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output), - false); - } - break; - } - default: - MicroPrintf("Type %s (%d) not supported.", - TfLiteTypeGetName(output->type), output->type); - return kTfLiteError; - } - - return kTfLiteOk; -} - -void* AddInit(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd)); -} - -TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) { - auto* params = reinterpret_cast(node->builtin_data); - - TFLITE_DCHECK(node->user_data != nullptr); - const OpDataAdd* data = static_cast(node->user_data); - - const TfLiteEvalTensor* input1 = - tflite::micro::GetEvalInput(context, node, kAddInputTensor1); - const TfLiteEvalTensor* input2 = - tflite::micro::GetEvalInput(context, node, kAddInputTensor2); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kAddOutputTensor); - - if (output->type == kTfLiteFloat32) { - EvalAdd(context, node, params, data, input1, input2, output); - } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { - TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data, - input1, input2, output)); - } else { - MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type), - output->type); - return kTfLiteError; - } - - return kTfLiteOk; -} - -TfLiteRegistration Register_ADD() { - return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/add_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/add_common.cc deleted file mode 100644 index b285b800..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/add_common.cc +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/add.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h" -#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/add.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" - -namespace tflite { - -const int kAddInputTensor1 = 0; -const int kAddInputTensor2 = 1; -const int kAddOutputTensor = 0; - -TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteTensor* output, OpDataAdd* data) { - data->requires_broadcast = !HaveSameShapes(input1, input2); - - if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { - // 8bit -> 8bit general quantized path, with general rescalings - data->input1_offset = -input1->params.zero_point; - data->input2_offset = -input2->params.zero_point; - data->output_offset = output->params.zero_point; - data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20; - const double twice_max_input_scale = - 2 * static_cast( - std::max(input1->params.scale, input2->params.scale)); - const double real_input1_multiplier = - static_cast(input1->params.scale) / twice_max_input_scale; - const double real_input2_multiplier = - static_cast(input2->params.scale) / twice_max_input_scale; - const double real_output_multiplier = - twice_max_input_scale / - ((1 << data->left_shift) * static_cast(output->params.scale)); - - QuantizeMultiplierSmallerThanOneExp( - real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); - - QuantizeMultiplierSmallerThanOneExp( - real_input2_multiplier, &data->input2_multiplier, &data->input2_shift); - - QuantizeMultiplierSmallerThanOneExp( - real_output_multiplier, &data->output_multiplier, &data->output_shift); - - TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( - context, params->activation, output, &data->output_activation_min, - &data->output_activation_max)); - } else if (output->type == kTfLiteFloat32) { - CalculateActivationRange(params->activation, - &data->output_activation_min_f32, - &data->output_activation_max_f32); - } - - return kTfLiteOk; -} - -TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - TFLITE_DCHECK(node->builtin_data != nullptr); - - MicroContext* micro_context = GetMicroContext(context); - TfLiteTensor* input1 = - micro_context->AllocateTempInputTensor(node, kAddInputTensor1); - TF_LITE_ENSURE(context, input1 != nullptr); - TfLiteTensor* input2 = - micro_context->AllocateTempInputTensor(node, kAddInputTensor2); - TF_LITE_ENSURE(context, input2 != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kAddOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - OpDataAdd* data = static_cast(node->user_data); - auto* params = reinterpret_cast(node->builtin_data); - - TF_LITE_ENSURE_STATUS( - CalculateOpDataAdd(context, params, input1, input2, output, data)); - - micro_context->DeallocateTempTfLiteTensor(input1); - micro_context->DeallocateTempTfLiteTensor(input2); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/assign_variable.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/assign_variable.cc deleted file mode 100644 index a770d0aa..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/assign_variable.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_graph.h" -#include "tensorflow/lite/micro/micro_resource_variable.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -namespace { - -constexpr int kInputVariableId = 0; -constexpr int kInputValue = 1; - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 0); - - // This must be a TfLiteEvalTensor despite this being in Prepare, because - // CreateTensor allocates a temp tensor from the flatbuffer, which does not - // contain the correct ID generated within the VAR_HANDLE op. EvalTensors are - // all allocated during StartModelAllocation which happens before - // init/prepare, and VAR_HANDLE Prepare() references its own op_data in the - // TfLiteEvalTensor, so reading the ID here is valid. - const TfLiteEvalTensor* input_resource_id_tensor = - tflite::micro::GetEvalInput(context, node, kInputVariableId); - TFLITE_DCHECK(input_resource_id_tensor != nullptr); - TF_LITE_ENSURE(context, (input_resource_id_tensor->type == kTfLiteResource || - input_resource_id_tensor->type == kTfLiteInt32)); - TF_LITE_ENSURE_EQ(context, NumElements(input_resource_id_tensor->dims), 1); - - tflite::MicroContext* micro_context = tflite::GetMicroContext(context); - TfLiteTensor* input_value = - micro_context->AllocateTempInputTensor(node, kInputValue); - TFLITE_DCHECK(input_value != nullptr); - - MicroGraph& graph_info = micro_context->graph(); - - MicroResourceVariables* resources = graph_info.GetResourceVariables(); - TF_LITE_ENSURE_OK(context, - resources->Allocate(input_resource_id_tensor->data.i32[0], - context, input_value)); - - micro_context->DeallocateTempTfLiteTensor(input_value); - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input_id = - tflite::micro::GetEvalInput(context, node, kInputVariableId); - TFLITE_DCHECK(input_id != nullptr); - - const TfLiteEvalTensor* input_value = - tflite::micro::GetEvalInput(context, node, kInputValue); - TFLITE_DCHECK(input_value != nullptr); - - tflite::MicroContext* micro_context = tflite::GetMicroContext(context); - MicroGraph& graph_info = micro_context->graph(); - - MicroResourceVariables* resources = graph_info.GetResourceVariables(); - if (resources == nullptr) { - MicroPrintf( - "ASSIGN_VARIABLE requires resource variables. Please create " - "ResourceVariables and pass it to the interpreter."); - return kTfLiteError; - } - TF_LITE_ENSURE_OK(context, - resources->Assign(input_id->data.i32[0], input_value)); - return kTfLiteOk; -} - -} // namespace. - -TfLiteRegistration Register_ASSIGN_VARIABLE() { - return tflite::micro::RegisterOp(nullptr, Prepare, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/broadcast_args.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/broadcast_args.cc deleted file mode 100644 index be2672ec..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/broadcast_args.cc +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/kernels/internal/reference/broadcast_args.h" - -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_context.h" - -namespace tflite { -namespace { -constexpr int kShape1Tensor = 0; -constexpr int kShape2Tensor = 1; -constexpr int kOutputTensor = 0; - -TfLiteStatus BroadcastArgsPrepare(TfLiteContext* context, TfLiteNode* node) { - TF_LITE_ENSURE(context, NumInputs(node) == 2); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - - MicroContext* micro_context = GetMicroContext(context); - TfLiteTensor* shape1 = - micro_context->AllocateTempInputTensor(node, kShape1Tensor); - TfLiteTensor* shape2 = - micro_context->AllocateTempInputTensor(node, kShape2Tensor); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); - - TF_LITE_ENSURE(context, - shape1->type == kTfLiteInt32 || shape1->type == kTfLiteInt64); - TF_LITE_ENSURE_EQ(context, shape1->type, shape2->type); - TF_LITE_ENSURE_EQ(context, shape1->type, output->type); - - // Ensures the shapes are 1D tensor. - TF_LITE_ENSURE_EQ(context, NumDimensions(shape1), 1); - TF_LITE_ENSURE_EQ(context, NumDimensions(shape2), 1); - - // Ensure the shape of the output tensor is compatible - TF_LITE_ENSURE_EQ(context, NumDimensions(output), 1); - - micro_context->DeallocateTempTfLiteTensor(shape1); - micro_context->DeallocateTempTfLiteTensor(shape2); - micro_context->DeallocateTempTfLiteTensor(output); - - return kTfLiteOk; -} - -TfLiteStatus BroadcastArgsEval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* shape1 = - micro::GetEvalInput(context, node, kShape1Tensor); - const TfLiteEvalTensor* shape2 = - micro::GetEvalInput(context, node, kShape2Tensor); - TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor); - - if (output->type == kTfLiteInt32) { - reference_ops::BroadcastArgs( - micro::GetTensorShape(shape1), micro::GetTensorData(shape1), - micro::GetTensorShape(shape2), micro::GetTensorData(shape2), - micro::GetTensorShape(output), micro::GetTensorData(output)); - } else { - reference_ops::BroadcastArgs( - micro::GetTensorShape(shape1), micro::GetTensorData(shape1), - micro::GetTensorShape(shape2), micro::GetTensorData(shape2), - micro::GetTensorShape(output), micro::GetTensorData(output)); - } - - return kTfLiteOk; -} - -} // namespace - -TfLiteRegistration Register_BROADCAST_ARGS() { - return tflite::micro::RegisterOp(nullptr, BroadcastArgsPrepare, - BroadcastArgsEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/broadcast_to.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/broadcast_to.cc deleted file mode 100644 index 63a14db2..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/broadcast_to.cc +++ /dev/null @@ -1,123 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/kernels/internal/reference/broadcast_to.h" - -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_context.h" - -namespace tflite { - -namespace { -constexpr int kInputTensor = 0; -constexpr int kShapeTensor = 1; -constexpr int kOutputTensor = 0; -// Support a maximum of 5 dimensions in TFLM. -constexpr int kMaxDims = 5; - -TfLiteStatus ValidateOutputTensor(TfLiteContext* context, TfLiteTensor* input, - TfLiteTensor* shape, TfLiteTensor* output) { - // Ensures the shape is 1D tensor. - TF_LITE_ENSURE_EQ(context, NumDimensions(shape), 1); - - // Ensure output dims is not less than input dims. - int input_num_dims = NumDimensions(input); - int output_num_dims = NumDimensions(output); - int shape_num_dims = SizeOfDimension(shape, 0); - TF_LITE_ENSURE_MSG(context, output_num_dims == shape_num_dims, - "Output must match with the expected shape dimension."); - TF_LITE_ENSURE_MSG(context, input_num_dims <= output_num_dims, - "Output shape must be broadcastable from input shape."); - TF_LITE_ENSURE_MSG(context, output_num_dims <= kMaxDims, - "BroadcastTo only supports 1-5D tensor."); - - // Check if output shape is broadcastable from input shape. - auto get_shape_data = [shape](int i) -> int32_t { - if (shape->type == kTfLiteInt32) { - return GetTensorData(shape)[i]; - } else { - return GetTensorData(shape)[i]; - } - }; - - int extending_dims = output_num_dims - input_num_dims; - for (int idx = 0; idx < input_num_dims; ++idx) { - TF_LITE_ENSURE_MSG( - context, - (SizeOfDimension(input, idx) == 1 || - SizeOfDimension(input, idx) == get_shape_data(extending_dims + idx)), - "Output shape must be broadcastable from input shape."); - } - - // Validating the shape of the output tensor. - tflite::RuntimeShape output_shape = tflite::GetTensorShape(output); - for (int idx = 0; idx < output_num_dims; ++idx) { - TF_LITE_ENSURE(context, output_shape.Dims(idx) == get_shape_data(idx)); - } - return kTfLiteOk; -} - -TfLiteStatus BroadcastToPrepare(TfLiteContext* context, TfLiteNode* node) { - TF_LITE_ENSURE(context, NumInputs(node) == 2); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - MicroContext* micro_context = GetMicroContext(context); - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kInputTensor); - TfLiteTensor* shape = - micro_context->AllocateTempInputTensor(node, kShapeTensor); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); - - TF_LITE_ENSURE_MSG(context, (NumDimensions(input) <= kMaxDims), - "BroadcastTo only supports 1-5D tensor."); - - TF_LITE_ENSURE(context, - shape->type == kTfLiteInt32 || shape->type == kTfLiteInt64); - TF_LITE_ENSURE_EQ(context, input->type, output->type); - - // Does not support String type due to its variable size. This limitation is - // the same as TFLite. - TF_LITE_ENSURE(context, input->type != kTfLiteString); - - TF_LITE_ENSURE_STATUS(ValidateOutputTensor(context, input, shape, output)); - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(shape); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -TfLiteStatus BroadcastToEval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = - micro::GetEvalInput(context, node, kInputTensor); - TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor); - - // BroadcastTo op support upto 5 dims, different from 8 dims in TFLite. - reference_ops::BroadcastTo( - micro::GetTensorShape(input), input->data.raw, - micro::GetTensorShape(output), output->data.raw, input->type); - return kTfLiteOk; -} -} // namespace - -TfLiteRegistration Register_BROADCAST_TO() { - return tflite::micro::RegisterOp(nullptr, BroadcastToPrepare, - BroadcastToEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/call_once.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/call_once.cc deleted file mode 100644 index 200242b2..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/call_once.cc +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_context.h" -#include "tensorflow/lite/micro/micro_graph.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -namespace { - -struct OpData { - int init_subgraph_index; - bool has_run; -}; - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpData)); -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - OpData* op_data = reinterpret_cast(node->user_data); - const auto* params = - reinterpret_cast(node->builtin_data); - op_data->init_subgraph_index = params->init_subgraph_index; - op_data->has_run = false; - - TF_LITE_ENSURE(context, NumInputs(node) == 0); - TF_LITE_ENSURE(context, NumOutputs(node) == 0); - - tflite::MicroContext* micro_context = tflite::GetMicroContext(context); - MicroGraph& graph_info = micro_context->graph(); - - TF_LITE_ENSURE(context, - op_data->init_subgraph_index < graph_info.NumSubgraphs()); - - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - OpData* op_data = reinterpret_cast(node->user_data); - - // Call once only runs one time then is a no-op for every subsequent call. - if (op_data->has_run) { - return kTfLiteOk; - } - - tflite::MicroContext* micro_context = tflite::GetMicroContext(context); - MicroGraph& graph_info = micro_context->graph(); - - TF_LITE_ENSURE_OK(context, - graph_info.InvokeSubgraph(op_data->init_subgraph_index)); - - op_data->has_run = true; - - return kTfLiteOk; -} - -} // namespace. - -TfLiteRegistration Register_CALL_ONCE() { - return tflite::micro::RegisterOp(Init, Prepare, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/cast.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/cast.cc deleted file mode 100644 index a1f4516b..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/cast.cc +++ /dev/null @@ -1,114 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { -namespace { - -constexpr int kInputTensor = 0; -constexpr int kOutputTensor = 0; - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - - return kTfLiteOk; -} - -template -void copyCast(const FromT* in, ToT* out, int num_elements) { - std::transform(in, in + num_elements, out, - [](FromT a) { return static_cast(a); }); -} - -template -TfLiteStatus copyToTensor(TfLiteContext* context, const FromT* in, - TfLiteEvalTensor* out, int num_elements) { - switch (out->type) { - case kTfLiteInt8: - copyCast(in, out->data.int8, num_elements); - break; - case kTfLiteInt16: - copyCast(in, out->data.i16, num_elements); - break; - case kTfLiteInt32: - copyCast(in, out->data.i32, num_elements); - break; - case kTfLiteFloat32: - copyCast(in, tflite::micro::GetTensorData(out), num_elements); - break; - default: - // Unsupported type. - MicroPrintf("Output type %s (%d) not supported.", - TfLiteTypeGetName(out->type), out->type); - } - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kInputTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kOutputTensor); - int num_elements = MatchingFlatSize(tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorShape(output)); - - switch (input->type) { - case kTfLiteInt8: - return copyToTensor(context, input->data.int8, output, num_elements); - case kTfLiteInt16: - return copyToTensor(context, tflite::micro::GetTensorData(input), - output, num_elements); - case kTfLiteInt32: - return copyToTensor(context, tflite::micro::GetTensorData(input), - output, num_elements); - case kTfLiteUInt32: - return copyToTensor(context, - tflite::micro::GetTensorData(input), output, - num_elements); - case kTfLiteFloat32: - return copyToTensor(context, tflite::micro::GetTensorData(input), - output, num_elements); - default: - // Unsupported type. - MicroPrintf("Input type %s (%d) not supported.", - TfLiteTypeGetName(input->type), input->type); - } - return kTfLiteOk; -} -} // namespace - -TfLiteRegistration Register_CAST() { - return tflite::micro::RegisterOp(nullptr, Prepare, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/ceil.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/ceil.cc deleted file mode 100644 index a390a735..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/ceil.cc +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/reference/ceil.h" - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -namespace tflite { -namespace ops { -namespace micro { -namespace ceil { - -constexpr int kInputTensor = 0; -constexpr int kOutputTensor = 0; - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); - TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type); - TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes); - TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size); - for (int i = 0; i < output->dims->size; ++i) { - TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]); - } - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kInputTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kOutputTensor); - - reference_ops::Ceil(tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - - return kTfLiteOk; -} -} // namespace ceil - -TfLiteRegistration Register_CEIL() { - return tflite::micro::RegisterOp(nullptr, ceil::Prepare, ceil::Eval); -} - -} // namespace micro -} // namespace ops -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/circular_buffer.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/circular_buffer.h deleted file mode 100644 index 51adf746..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/circular_buffer.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -// The CircularBuffer op has one input and one output tensor. -extern const int kCircularBufferInputTensor; -extern const int kCircularBufferOutputTensor; - -// Indices into the init flexbuffer's vector. -// The parameter's name is in the comment that follows. -// Elements in the vectors are ordered alphabetically by parameter name. -extern const int kCircularBufferCyclesMaxIndex; // 'cycles_max' - -// TODO(b/149795762): Add this to TfLiteStatus enum. -extern const TfLiteStatus kTfLiteAbort; - -// These fields control the stride period of a strided streaming model. This op -// returns kTfLiteAbort until cycles_until_run-- is zero. At this time, -// cycles_until_run is reset to cycles_max. -struct OpDataCircularBuffer { - int cycles_until_run; - int cycles_max; -}; - -TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/circular_buffer_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/circular_buffer_common.cc deleted file mode 100644 index 81db6e65..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/circular_buffer_common.cc +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/flatbuffer_utils.h" -#include "tensorflow/lite/micro/kernels/circular_buffer.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -namespace tflite { - -// The CircularBuffer op has one input and one output tensor. -const int kCircularBufferInputTensor = 0; -const int kCircularBufferOutputTensor = 0; - -// Indices into the init flexbuffer's vector. -// The parameter's name is in the comment that follows. -// Elements in the vectors are ordered alphabetically by parameter name. -const int kCircularBufferCyclesMaxIndex = 0; // 'cycles_max' - -// TODO(b/149795762): Add this to TfLiteStatus enum. -const TfLiteStatus kTfLiteAbort = static_cast(-9); - -TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kCircularBufferInputTensor); - TfLiteTensor* output = micro_context->AllocateTempOutputTensor( - node, kCircularBufferOutputTensor); - - TFLITE_DCHECK(node->user_data != nullptr); - OpDataCircularBuffer* op_data = - static_cast(node->user_data); - - TF_LITE_ENSURE(context, input != nullptr); - TF_LITE_ENSURE(context, output != nullptr); - TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]); - TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]); - TF_LITE_ENSURE_EQ(context, input->dims->data[2], output->dims->data[2]); - TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]); - - TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); - - // The circular buffer custom operator currently only supports int8. - TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8); - - if (op_data->cycles_max <= 0) { - // The last circular buffer layer simply accumulates outputs, and does not - // run periodically. - // TODO(b/150001379): Move this special case logic to the tflite flatbuffer. - static int cb_prepare_count = 0; - cb_prepare_count++; - // These checks specifically work for the only two streaming models - // supported on TFLM. They use the shape of the output tensor along with the - // layer number to determine if the circular buffer period should be 1 or 2. - - // These models are outlined int the following documents: - // https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing - // https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing - if (output->dims->data[1] == 5 || output->dims->data[1] == 13 || - output->dims->data[1] == 25 || - (cb_prepare_count == 5 && output->dims->data[2] == 2 && - output->dims->data[3] == 96)) { - op_data->cycles_max = 1; - cb_prepare_count = 0; - } else { - op_data->cycles_max = 2; - } - } - op_data->cycles_until_run = op_data->cycles_max; - node->user_data = op_data; - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h deleted file mode 100644 index 2fbf4fe9..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H -#define TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H - -extern const int g_gen_data_size_circular_buffer_config; -extern const unsigned char g_gen_data_circular_buffer_config[]; - -#endif diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv.cc deleted file mode 100644 index 87ea92e6..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv.cc +++ /dev/null @@ -1,141 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/kernels/conv.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/conv.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { -namespace { - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpDataConv)); -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kConvInputTensor); - const TfLiteEvalTensor* filter = - tflite::micro::GetEvalInput(context, node, kConvWeightsTensor); - const TfLiteEvalTensor* bias = - (NumInputs(node) == 3) - ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor) - : nullptr; - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kConvOutputTensor); - - TFLITE_DCHECK(node->builtin_data != nullptr); - const auto& params = - *(reinterpret_cast(node->builtin_data)); - TFLITE_DCHECK(node->user_data != nullptr); - const auto& data = *(static_cast(node->user_data)); - - TF_LITE_ENSURE_EQ(context, input->type, output->type); - TF_LITE_ENSURE_MSG( - context, - input->type == filter->type || - (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8), - "Hybrid models are not supported on TFLite Micro."); - - switch (input->type) { // Already know in/out types are same. - case kTfLiteFloat32: { - tflite::reference_ops::Conv( - ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - tflite::micro::GetTensorShape(bias), - tflite::micro::GetOptionalTensorData(bias), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output), - tflite::micro::GetTensorShape(nullptr), nullptr); - break; - } - case kTfLiteInt16: { - switch (bias->type) { - case kTfLiteInt32: { - reference_integer_ops::ConvPerChannel( - ConvParamsQuantized(params, data), - data.per_channel_output_multiplier, data.per_channel_output_shift, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - tflite::micro::GetTensorShape(bias), - tflite::micro::GetOptionalTensorData(bias), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - break; - } - case kTfLiteInt64: { - reference_integer_ops::ConvPerChannel( - ConvParamsQuantized(params, data), - data.per_channel_output_multiplier, data.per_channel_output_shift, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - tflite::micro::GetTensorShape(bias), - tflite::micro::GetOptionalTensorData(bias), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - break; - } - default: - MicroPrintf("Bias type %s (%d) not supported.", - TfLiteTypeGetName(bias->type), bias->type); - return kTfLiteError; - } - break; - } - case kTfLiteInt8: { - reference_integer_ops::ConvPerChannel( - ConvParamsQuantized(params, data), data.per_channel_output_multiplier, - data.per_channel_output_shift, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - tflite::micro::GetTensorShape(bias), - tflite::micro::GetOptionalTensorData(bias), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - break; - } - default: - MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), - input->type); - return kTfLiteError; - } - return kTfLiteOk; -} - -} // namespace - -TfLiteRegistration Register_CONV_2D() { - return tflite::micro::RegisterOp(Init, ConvPrepare, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv.h deleted file mode 100644 index 06b35e1e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv.h +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_ - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -struct OpDataConv { - TfLitePaddingValues padding; - - // Cached tensor zero point values for quantized operations. - int32_t input_zero_point; - int32_t filter_zero_point; - int32_t output_zero_point; - - // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. - int32_t output_multiplier; - int output_shift; - - // Per channel output multiplier and shift. - int32_t* per_channel_output_multiplier; - int32_t* per_channel_output_shift; - - // The range of the fused activation layer. For example for kNone and - // uint8_t these would be 0 and 255. - int32_t output_activation_min; - int32_t output_activation_max; -}; - -extern const int kConvInputTensor; -extern const int kConvWeightsTensor; -extern const int kConvBiasTensor; -extern const int kConvOutputTensor; -extern const int kConvQuantizedDimension; - -// Returns a ConvParams struct with all the parameters needed for a -// float computation. -ConvParams ConvParamsFloat(const TfLiteConvParams& params, - const OpDataConv& data); - -// Returns a ConvParams struct with all the parameters needed for a -// quantized computation. -ConvParams ConvParamsQuantized(const TfLiteConvParams& params, - const OpDataConv& data); - -TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node, - const TfLiteConvParams& params, int width, - int height, int filter_width, - int filter_height, int out_width, - int out_height, const TfLiteType data_type, - OpDataConv* data); - -TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node); - -// This is the most generic TfLiteRegistration. The actual supported types may -// still be target dependent. The only requirement is that every implementation -// (reference or optimized) must define this function. -TfLiteRegistration Register_CONV_2D(); - -#if defined(XTENSA) -// Returns a TfLiteRegistration struct for kernel variant that only supports -// int8 activations and int8 weights and always calls the reference -// implementation. -TfLiteRegistration Register_CONV_2D_INT8REF(); -#else -inline TfLiteRegistration Register_CONV_2D_INT8REF() { - return Register_CONV_2D(); -} -#endif - -#if defined(CMSIS_NN) -// Returns a TfLiteRegistration struct for kernel variant that only supports -// int8 activations and int8 weights and uses the latency optimized -// implementations. -TfLiteRegistration Register_CONV_2D_INT8(); - -// Returns a TfLiteRegistration struct for kernel variant that only supports -// int16 activations and int8 weights and uses the latency optimized -// implementations. -TfLiteRegistration Register_CONV_2D_INT16(); - -#else -inline TfLiteRegistration Register_CONV_2D_INT8() { return Register_CONV_2D(); } - -inline TfLiteRegistration Register_CONV_2D_INT16() { - return Register_CONV_2D(); -} -#endif - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv_common.cc deleted file mode 100644 index 7115f7ba..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv_common.cc +++ /dev/null @@ -1,197 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/conv.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/conv.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -namespace tflite { - -const int kConvInputTensor = 0; -const int kConvWeightsTensor = 1; -const int kConvBiasTensor = 2; -const int kConvOutputTensor = 0; - -// Conv is quantized along dimension 0: -// https://www.tensorflow.org/lite/performance/quantization_spec -const int kConvQuantizedDimension = 0; - -// Returns a ConvParams struct with all the parameters needed for a -// float computation. -ConvParams ConvParamsFloat(const TfLiteConvParams& params, - const OpDataConv& data) { - ConvParams op_params; - CalculateActivationRange(params.activation, &op_params.float_activation_min, - &op_params.float_activation_max); - op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding); - op_params.padding_values.width = data.padding.width; - op_params.padding_values.height = data.padding.height; - op_params.stride_width = params.stride_width; - op_params.stride_height = params.stride_height; - op_params.dilation_width_factor = params.dilation_width_factor; - op_params.dilation_height_factor = params.dilation_height_factor; - return op_params; -} - -// Returns a ConvParams struct with all the parameters needed for a -// quantized computation. -ConvParams ConvParamsQuantized(const TfLiteConvParams& params, - const OpDataConv& data) { - ConvParams op_params; - op_params.input_offset = -data.input_zero_point; - op_params.weights_offset = -data.filter_zero_point; - op_params.output_offset = data.output_zero_point; - op_params.output_multiplier = data.output_multiplier; - op_params.output_shift = -data.output_shift; - op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding); - op_params.padding_values.height = data.padding.height; - op_params.padding_values.width = data.padding.width; - op_params.stride_height = params.stride_height; - op_params.stride_width = params.stride_width; - op_params.dilation_height_factor = params.dilation_height_factor; - op_params.dilation_width_factor = params.dilation_width_factor; - op_params.quantized_activation_min = data.output_activation_min; - op_params.quantized_activation_max = data.output_activation_max; - return op_params; -} - -TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node, - const TfLiteConvParams& params, int width, - int height, int filter_width, - int filter_height, int out_width, - int out_height, const TfLiteType data_type, - OpDataConv* data) { - bool has_bias = node->inputs->size == 3; - // Check number of inputs/outputs - TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2); - TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); - - // Matching GetWindowedOutputSize in TensorFlow. - auto padding = params.padding; - data->padding = ComputePaddingHeightWidth( - params.stride_height, params.stride_width, params.dilation_height_factor, - params.dilation_width_factor, height, width, filter_height, filter_width, - padding, &out_height, &out_width); - - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kConvInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* filter = - micro_context->AllocateTempInputTensor(node, kConvWeightsTensor); - TF_LITE_ENSURE(context, filter != nullptr); - TfLiteTensor* bias = - micro_context->AllocateTempInputTensor(node, kConvBiasTensor); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kConvOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - // Note that quantized inference requires that all tensors have their - // parameters set. This is usually done during quantized training. - if (data_type != kTfLiteFloat32) { - int output_channels = filter->dims->data[kConvQuantizedDimension]; - - TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( - context, input, filter, bias, output, params.activation, - &data->output_multiplier, &data->output_shift, - &data->output_activation_min, &data->output_activation_max, - data->per_channel_output_multiplier, data->per_channel_output_shift, - output_channels)); - } - - data->input_zero_point = input->params.zero_point; - data->filter_zero_point = filter->params.zero_point; - data->output_zero_point = output->params.zero_point; - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(filter); - micro_context->DeallocateTempTfLiteTensor(output); - micro_context->DeallocateTempTfLiteTensor(bias); - - return kTfLiteOk; -} - -TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - TFLITE_DCHECK(node->builtin_data != nullptr); - - OpDataConv* data = static_cast(node->user_data); - const auto& params = - *(static_cast(node->builtin_data)); - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kConvOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kConvInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* filter = - micro_context->AllocateTempInputTensor(node, kConvWeightsTensor); - TF_LITE_ENSURE(context, filter != nullptr); - - const int input_width = input->dims->data[2]; - const int input_height = input->dims->data[1]; - const int filter_width = filter->dims->data[2]; - const int filter_height = filter->dims->data[1]; - const int output_width = output->dims->data[2]; - const int output_height = output->dims->data[1]; - - // Dynamically allocate per-channel quantization parameters. - const int num_channels = filter->dims->data[kConvQuantizedDimension]; - data->per_channel_output_multiplier = - static_cast(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t))); - data->per_channel_output_shift = - static_cast(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t))); - - // All per-channel quantized tensors need valid zero point and scale arrays. - if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { - TF_LITE_ENSURE_EQ(context, filter->quantization.type, - kTfLiteAffineQuantization); - - const auto* affine_quantization = - static_cast(filter->quantization.params); - TFLITE_DCHECK(affine_quantization != nullptr); - TFLITE_DCHECK(affine_quantization->scale != nullptr); - TFLITE_DCHECK(affine_quantization->zero_point != nullptr); - - TF_LITE_ENSURE(context, - affine_quantization->scale->size == 1 || - affine_quantization->scale->size == - filter->dims->data[kConvQuantizedDimension]); - } - - TF_LITE_ENSURE_STATUS(CalculateOpDataConv( - context, node, params, input_width, input_height, filter_width, - filter_height, output_width, output_height, input->type, data)); - - micro_context->DeallocateTempTfLiteTensor(filter); - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - - return kTfLiteOk; -} -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv_test.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv_test.h deleted file mode 100644 index 47ba8ac4..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/conv_test.h +++ /dev/null @@ -1,113 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/kernels/kernel_runner.h" -#include "tensorflow/lite/micro/kernels/micro_ops.h" -#include "tensorflow/lite/micro/test_helpers.h" -#include "tensorflow/lite/micro/testing/micro_test.h" - -namespace tflite { -namespace testing { - -TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, - int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, float* output_data); - -TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, - int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int8_t* output_data); - -TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size, - int output_length, TfLiteConvParams* conv_params, - TfLiteRegistration registration, uint8_t* output_data); - -TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, - const float* expected_output_data, - int output_length, - TfLiteConvParams* conv_params, - TfLiteRegistration registration, - float* output_data, float tolerance = 1e-5); - -TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, - const int8_t* expected_output_data, - int output_length, - TfLiteConvParams* conv_params, - TfLiteRegistration registration, - int8_t* output_data, float tolerance = 1e-5); - -TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, - const uint8_t* expected_output_data, - int output_length, - TfLiteConvParams* conv_params, - TfLiteRegistration registration, - uint8_t* output_data, float tolerance = 1e-5); - -TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data, - int* filter_dims_data, const float* filter_data, - int* bias_dims_data, const float* bias_data, - int* output_dims_data, - const float* expected_output_data, - TfLiteConvParams* conv_params, - TfLiteRegistration registration, float* output_data); - -TfLiteStatus TestConvQuantizedPerLayer( - int* input_dims_data, const float* input_data, uint8_t* input_quantized, - float input_scale, int* filter_dims_data, const float* filter_data, - uint8_t* filter_quantized, float filter_scale, int* bias_dims_data, - const float* bias_data, int32_t* bias_quantized, int* output_dims_data, - const float* expected_output_data, uint8_t* expected_output_quantized, - float output_scale, TfLiteConvParams* conv_params, - TfLiteRegistration registration, uint8_t* output_data); - -TfLiteStatus TestConvQuantizedPerChannel( - int* input_dims_data, const float* input_data, int8_t* input_quantized, - float input_scale, int input_zero_point, int* filter_dims_data, - const float* filter_data, int8_t* filter_data_quantized, - int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized, - float* bias_scales, int* bias_zero_points, int* output_dims_data, - const float* expected_output_data, int8_t* expected_output_data_quantized, - float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int8_t* output_data); - -TfLiteStatus TestConvQuantizedPerChannel( - int* input_dims_data, const float* input_data, int16_t* input_quantized, - float input_scale, int input_zero_point, int* filter_dims_data, - const float* filter_data, int8_t* filter_data_quantized, - int* bias_dims_data, const float* bias_data, - std::int64_t* bias_data_quantized, float* bias_scales, - int* bias_zero_points, int* output_dims_data, - const float* expected_output_data, int16_t* expected_output_data_quantized, - float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int16_t* output_data); - -TfLiteStatus TestConvQuantizedPerChannel( - int* input_dims_data, const float* input_data, int16_t* input_quantized, - float input_scale, int input_zero_point, int* filter_dims_data, - const float* filter_data, int8_t* filter_data_quantized, - int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized, - float* bias_scales, int* bias_zero_points, int* output_dims_data, - const float* expected_output_data, int16_t* expected_output_data_quantized, - float output_scale, int output_zero_point, TfLiteConvParams* conv_params, - TfLiteRegistration registration, int16_t* output_data); - -} // namespace testing -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/depthwise_conv.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/depthwise_conv.h deleted file mode 100644 index 562438d7..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/depthwise_conv.h +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_ - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/micro/kernels/conv.h" - -namespace tflite { - -extern const int kDepthwiseConvInputTensor; -extern const int kDepthwiseConvWeightsTensor; -extern const int kDepthwiseConvBiasTensor; -extern const int kDepthwiseConvOutputTensor; -extern const int kDepthwiseConvQuantizedDimension; - -// Returns a DepthwiseParams struct with all the parameters needed for a -// float computation. -DepthwiseParams DepthwiseConvParamsFloat( - const TfLiteDepthwiseConvParams& params, const OpDataConv& data); - -// Returns a DepthwiseParams struct with all the parameters needed for a -// quantized computation. -DepthwiseParams DepthwiseConvParamsQuantized( - const TfLiteDepthwiseConvParams& params, const OpDataConv& data); - -TfLiteStatus CalculateOpDataDepthwiseConv( - TfLiteContext* context, TfLiteNode* node, - const TfLiteDepthwiseConvParams& params, int width, int height, - int filter_width, int filter_height, int out_width, int out_height, - const TfLiteType data_type, OpDataConv* data); - -TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node); - -// This is the most generic TfLiteRegistration. The actual supported types may -// still be target dependent. The only requirement is that every implementation -// (reference or optimized) must define this function. -TfLiteRegistration Register_DEPTHWISE_CONV_2D(); - -#if defined(CMSIS_NN) -// Returns a TfLiteRegistration struct for kernel variant that only supports -// int8 activations and int8 weights and uses the latency optimized -// implementations. -TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8(); - -// Returns a TfLiteRegistration struct for kernel variant that only supports -// int16 activations and int8 weights and uses the latency optimized -// implementations. -TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16(); - -#else -inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8() { - return Register_DEPTHWISE_CONV_2D(); -} - -inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16() { - return Register_DEPTHWISE_CONV_2D(); -} -#endif - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/depthwise_conv_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/depthwise_conv_common.cc deleted file mode 100644 index 3bf07274..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/depthwise_conv_common.cc +++ /dev/null @@ -1,202 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/depthwise_conv.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -namespace tflite { - -const int kDepthwiseConvInputTensor = 0; -const int kDepthwiseConvWeightsTensor = 1; -const int kDepthwiseConvBiasTensor = 2; -const int kDepthwiseConvOutputTensor = 0; - -// DepthwiseConv is quantized along dimension 3: -// https://www.tensorflow.org/lite/performance/quantization_spec -const int kDepthwiseConvQuantizedDimension = 3; - -// Returns a DepthwiseParams struct with all the parameters needed for a -// float computation. -DepthwiseParams DepthwiseConvParamsFloat( - const TfLiteDepthwiseConvParams& params, const OpDataConv& data) { - DepthwiseParams op_params; - CalculateActivationRange(params.activation, &op_params.float_activation_min, - &op_params.float_activation_max); - op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding); - op_params.padding_values.width = data.padding.width; - op_params.padding_values.height = data.padding.height; - op_params.stride_width = params.stride_width; - op_params.stride_height = params.stride_height; - op_params.dilation_width_factor = params.dilation_width_factor; - op_params.dilation_height_factor = params.dilation_height_factor; - op_params.depth_multiplier = params.depth_multiplier; - return op_params; -} - -// Returns a DepthwiseParams struct with all the parameters needed for a -// quantized computation. -DepthwiseParams DepthwiseConvParamsQuantized( - const TfLiteDepthwiseConvParams& params, const OpDataConv& data) { - DepthwiseParams op_params; - op_params.input_offset = -data.input_zero_point; - op_params.weights_offset = -data.filter_zero_point; - op_params.output_offset = data.output_zero_point; - op_params.output_multiplier = data.output_multiplier; - op_params.output_shift = -data.output_shift; - op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding); - op_params.padding_values.height = data.padding.height; - op_params.padding_values.width = data.padding.width; - op_params.stride_height = params.stride_height; - op_params.stride_width = params.stride_width; - op_params.dilation_height_factor = params.dilation_height_factor; - op_params.dilation_width_factor = params.dilation_width_factor; - op_params.depth_multiplier = params.depth_multiplier; - op_params.quantized_activation_min = data.output_activation_min; - op_params.quantized_activation_max = data.output_activation_max; - return op_params; -} - -TfLiteStatus CalculateOpDataDepthwiseConv( - TfLiteContext* context, TfLiteNode* node, - const TfLiteDepthwiseConvParams& params, int width, int height, - int filter_width, int filter_height, int out_width, int out_height, - const TfLiteType data_type, OpDataConv* data) { - bool has_bias = node->inputs->size == 3; - // Check number of inputs/outputs - TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2); - TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); - - // Matching GetWindowedOutputSize in TensorFlow. - auto padding = params.padding; - data->padding = ComputePaddingHeightWidth( - params.stride_height, params.stride_width, params.dilation_height_factor, - params.dilation_width_factor, height, width, filter_height, filter_width, - padding, &out_height, &out_width); - - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kConvInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* filter = - micro_context->AllocateTempInputTensor(node, kConvWeightsTensor); - TF_LITE_ENSURE(context, filter != nullptr); - TfLiteTensor* bias = - micro_context->AllocateTempInputTensor(node, kConvBiasTensor); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kConvOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - // Note that quantized inference requires that all tensors have their - // parameters set. This is usually done during quantized training. - if (data_type != kTfLiteFloat32) { - int output_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; - - TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( - context, input, filter, bias, output, params.activation, - &data->output_multiplier, &data->output_shift, - &data->output_activation_min, &data->output_activation_max, - data->per_channel_output_multiplier, data->per_channel_output_shift, - output_channels)); - } - - data->input_zero_point = input->params.zero_point; - data->filter_zero_point = filter->params.zero_point; - data->output_zero_point = output->params.zero_point; - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(filter); - micro_context->DeallocateTempTfLiteTensor(bias); - micro_context->DeallocateTempTfLiteTensor(output); - - return kTfLiteOk; -} - -TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - TFLITE_DCHECK(node->builtin_data != nullptr); - - OpDataConv* data = static_cast(node->user_data); - const auto& params = - *(static_cast(node->builtin_data)); - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kDepthwiseConvOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kDepthwiseConvInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* filter = - micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor); - TF_LITE_ENSURE(context, filter != nullptr); - - const int input_width = input->dims->data[2]; - const int input_height = input->dims->data[1]; - const int filter_width = filter->dims->data[2]; - const int filter_height = filter->dims->data[1]; - const int output_width = output->dims->data[2]; - const int output_height = output->dims->data[1]; - - // Dynamically allocate per-channel quantization parameters. - const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; - data->per_channel_output_multiplier = - static_cast(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t))); - data->per_channel_output_shift = - static_cast(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t))); - - // All per-channel quantized tensors need valid zero point and scale arrays. - if (input->type == kTfLiteInt8) { - TF_LITE_ENSURE_EQ(context, filter->quantization.type, - kTfLiteAffineQuantization); - - const auto* affine_quantization = - static_cast(filter->quantization.params); - TFLITE_DCHECK(affine_quantization != nullptr); - TFLITE_DCHECK(affine_quantization->scale != nullptr); - TFLITE_DCHECK(affine_quantization->zero_point != nullptr); - - TF_LITE_ENSURE( - context, affine_quantization->scale->size == 1 || - affine_quantization->scale->size == - filter->dims->data[kDepthwiseConvQuantizedDimension]); - - TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, - affine_quantization->zero_point->size); - } - - TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv( - context, node, params, input_width, input_height, filter_width, - filter_height, output_width, output_height, input->type, data)); - - micro_context->DeallocateTempTfLiteTensor(output); - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(filter); - - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/dequantize.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/dequantize.cc deleted file mode 100644 index 1cf7f133..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/dequantize.cc +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/reference/dequantize.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/quantize.h" -#include "tensorflow/lite/kernels/internal/reference/requantize.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/dequantize.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -void* DequantizeInit(TfLiteContext* context, const char* buffer, - size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(DequantizeOpData)); -} - -TfLiteStatus DequantizeEval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - DequantizeOpData* data = static_cast(node->user_data); - - const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); - TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); - - if (output->type == kTfLiteFloat32) { - switch (input->type) { - case kTfLiteInt8: - reference_ops::Dequantize(data->quantization_params, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - break; - case kTfLiteInt16: - reference_ops::Dequantize(data->quantization_params, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - break; - case kTfLiteUInt8: - reference_ops::Dequantize(data->quantization_params, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - break; - default: - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - } else { - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - - return kTfLiteOk; -} - -TfLiteRegistration Register_DEQUANTIZE() { - return tflite::micro::RegisterOp(DequantizeInit, DequantizePrepare, - DequantizeEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/dequantize.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/dequantize.h deleted file mode 100644 index fe6ec169..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/dequantize.h +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -struct DequantizeOpData { - tflite::DequantizationParams quantization_params; - // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. - int32_t output_multiplier; - int output_shift; - int32_t output_zero_point; -}; - -TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/dequantize_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/dequantize_common.cc deleted file mode 100644 index 438f9cda..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/dequantize_common.cc +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/dequantize.h" -#include "tensorflow/lite/kernels/internal/reference/quantize.h" -#include "tensorflow/lite/kernels/internal/reference/requantize.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/dequantize.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -namespace tflite { - -TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - DequantizeOpData* data = static_cast(node->user_data); - - TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - - MicroContext* micro_context = GetMicroContext(context); - - // TODO(b/140515557): Add cached dequant to improve hybrid model performance. - TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); - TF_LITE_ENSURE(context, output != nullptr); - - TF_LITE_ENSURE(context, input->type == kTfLiteInt8 || - input->type == kTfLiteInt16 || - input->type == kTfLiteUInt8); - TF_LITE_ENSURE(context, output->type == kTfLiteFloat32); - - if (output->type == kTfLiteInt32) { - const double effective_output_scale = - static_cast(input->params.scale) / - static_cast(output->params.scale); - QuantizeMultiplier(effective_output_scale, &data->output_multiplier, - &data->output_shift); - } - - data->quantization_params.zero_point = input->params.zero_point; - data->quantization_params.scale = static_cast(input->params.scale); - data->output_zero_point = output->params.zero_point; - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/detection_postprocess.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/detection_postprocess.cc deleted file mode 100644 index 326d87b5..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/detection_postprocess.cc +++ /dev/null @@ -1,807 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include -#include - -#include "flatbuffers/flexbuffers.h" -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { -namespace { - -/** - * This version of detection_postprocess is specific to TFLite Micro. It - * contains the following differences between the TFLite version: - * - * 1.) Temporaries (temporary tensors) - Micro use instead scratch buffer API. - * 2.) Output dimensions - the TFLite version does not support undefined out - * dimensions. So model must have static out dimensions. - */ - -// Input tensors -constexpr int kInputTensorBoxEncodings = 0; -constexpr int kInputTensorClassPredictions = 1; -constexpr int kInputTensorAnchors = 2; - -// Output tensors -constexpr int kOutputTensorDetectionBoxes = 0; -constexpr int kOutputTensorDetectionClasses = 1; -constexpr int kOutputTensorDetectionScores = 2; -constexpr int kOutputTensorNumDetections = 3; - -constexpr int kNumCoordBox = 4; -constexpr int kBatchSize = 1; - -constexpr int kNumDetectionsPerClass = 100; - -// Object Detection model produces axis-aligned boxes in two formats: -// BoxCorner represents the lower left corner (xmin, ymin) and -// the upper right corner (xmax, ymax). -// CenterSize represents the center (xcenter, ycenter), height and width. -// BoxCornerEncoding and CenterSizeEncoding are related as follows: -// ycenter = y / y_scale * anchor.h + anchor.y; -// xcenter = x / x_scale * anchor.w + anchor.x; -// half_h = 0.5*exp(h/ h_scale)) * anchor.h; -// half_w = 0.5*exp(w / w_scale)) * anchor.w; -// ymin = ycenter - half_h -// ymax = ycenter + half_h -// xmin = xcenter - half_w -// xmax = xcenter + half_w -struct BoxCornerEncoding { - float ymin; - float xmin; - float ymax; - float xmax; -}; - -struct CenterSizeEncoding { - float y; - float x; - float h; - float w; -}; -// We make sure that the memory allocations are contiguous with static_assert. -static_assert(sizeof(BoxCornerEncoding) == sizeof(float) * kNumCoordBox, - "Size of BoxCornerEncoding is 4 float values"); -static_assert(sizeof(CenterSizeEncoding) == sizeof(float) * kNumCoordBox, - "Size of CenterSizeEncoding is 4 float values"); - -struct OpData { - int max_detections; - int max_classes_per_detection; // Fast Non-Max-Suppression - int detections_per_class; // Regular Non-Max-Suppression - float non_max_suppression_score_threshold; - float intersection_over_union_threshold; - int num_classes; - bool use_regular_non_max_suppression; - CenterSizeEncoding scale_values; - - // Scratch buffers indexes - int active_candidate_idx; - int decoded_boxes_idx; - int scores_idx; - int score_buffer_idx; - int keep_scores_idx; - int scores_after_regular_non_max_suppression_idx; - int sorted_values_idx; - int keep_indices_idx; - int sorted_indices_idx; - int buffer_idx; - int selected_idx; - - // Cached tensor scale and zero point values for quantized operations - TfLiteQuantizationParams input_box_encodings; - TfLiteQuantizationParams input_class_predictions; - TfLiteQuantizationParams input_anchors; -}; - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - OpData* op_data = nullptr; - - const uint8_t* buffer_t = reinterpret_cast(buffer); - const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); - op_data = reinterpret_cast( - context->AllocatePersistentBuffer(context, sizeof(OpData))); - - op_data->max_detections = m["max_detections"].AsInt32(); - op_data->max_classes_per_detection = m["max_classes_per_detection"].AsInt32(); - if (m["detections_per_class"].IsNull()) - op_data->detections_per_class = kNumDetectionsPerClass; - else - op_data->detections_per_class = m["detections_per_class"].AsInt32(); - if (m["use_regular_nms"].IsNull()) - op_data->use_regular_non_max_suppression = false; - else - op_data->use_regular_non_max_suppression = m["use_regular_nms"].AsBool(); - - op_data->non_max_suppression_score_threshold = - m["nms_score_threshold"].AsFloat(); - op_data->intersection_over_union_threshold = m["nms_iou_threshold"].AsFloat(); - op_data->num_classes = m["num_classes"].AsInt32(); - op_data->scale_values.y = m["y_scale"].AsFloat(); - op_data->scale_values.x = m["x_scale"].AsFloat(); - op_data->scale_values.h = m["h_scale"].AsFloat(); - op_data->scale_values.w = m["w_scale"].AsFloat(); - - return op_data; -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - auto* op_data = static_cast(node->user_data); - - MicroContext* micro_context = GetMicroContext(context); - - // Inputs: box_encodings, scores, anchors - TF_LITE_ENSURE_EQ(context, NumInputs(node), 3); - TfLiteTensor* input_box_encodings = - micro_context->AllocateTempInputTensor(node, kInputTensorBoxEncodings); - TfLiteTensor* input_class_predictions = - micro_context->AllocateTempInputTensor(node, - kInputTensorClassPredictions); - TfLiteTensor* input_anchors = - micro_context->AllocateTempInputTensor(node, kInputTensorAnchors); - TF_LITE_ENSURE_EQ(context, NumDimensions(input_box_encodings), 3); - TF_LITE_ENSURE_EQ(context, NumDimensions(input_class_predictions), 3); - TF_LITE_ENSURE_EQ(context, NumDimensions(input_anchors), 2); - - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4); - const int num_boxes = input_box_encodings->dims->data[1]; - const int num_classes = op_data->num_classes; - - op_data->input_box_encodings.scale = input_box_encodings->params.scale; - op_data->input_box_encodings.zero_point = - input_box_encodings->params.zero_point; - op_data->input_class_predictions.scale = - input_class_predictions->params.scale; - op_data->input_class_predictions.zero_point = - input_class_predictions->params.zero_point; - op_data->input_anchors.scale = input_anchors->params.scale; - op_data->input_anchors.zero_point = input_anchors->params.zero_point; - - // Scratch tensors - context->RequestScratchBufferInArena(context, num_boxes, - &op_data->active_candidate_idx); - context->RequestScratchBufferInArena(context, - num_boxes * kNumCoordBox * sizeof(float), - &op_data->decoded_boxes_idx); - context->RequestScratchBufferInArena( - context, - input_class_predictions->dims->data[1] * - input_class_predictions->dims->data[2] * sizeof(float), - &op_data->scores_idx); - - // Additional buffers - context->RequestScratchBufferInArena(context, num_boxes * sizeof(float), - &op_data->score_buffer_idx); - context->RequestScratchBufferInArena(context, num_boxes * sizeof(float), - &op_data->keep_scores_idx); - context->RequestScratchBufferInArena( - context, op_data->max_detections * num_boxes * sizeof(float), - &op_data->scores_after_regular_non_max_suppression_idx); - context->RequestScratchBufferInArena( - context, op_data->max_detections * num_boxes * sizeof(float), - &op_data->sorted_values_idx); - context->RequestScratchBufferInArena(context, num_boxes * sizeof(int), - &op_data->keep_indices_idx); - context->RequestScratchBufferInArena( - context, op_data->max_detections * num_boxes * sizeof(int), - &op_data->sorted_indices_idx); - int buffer_size = std::max(num_classes, op_data->max_detections); - context->RequestScratchBufferInArena( - context, buffer_size * num_boxes * sizeof(int), &op_data->buffer_idx); - buffer_size = std::min(num_boxes, op_data->max_detections); - context->RequestScratchBufferInArena( - context, buffer_size * num_boxes * sizeof(int), &op_data->selected_idx); - - // Outputs: detection_boxes, detection_scores, detection_classes, - // num_detections - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4); - - micro_context->DeallocateTempTfLiteTensor(input_box_encodings); - micro_context->DeallocateTempTfLiteTensor(input_class_predictions); - micro_context->DeallocateTempTfLiteTensor(input_anchors); - - return kTfLiteOk; -} - -class Dequantizer { - public: - Dequantizer(int zero_point, float scale) - : zero_point_(zero_point), scale_(scale) {} - float operator()(uint8_t x) { - return (static_cast(x) - zero_point_) * scale_; - } - - private: - int zero_point_; - float scale_; -}; - -template -T ReInterpretTensor(const TfLiteEvalTensor* tensor) { - const float* tensor_base = tflite::micro::GetTensorData(tensor); - return reinterpret_cast(tensor_base); -} - -template -T ReInterpretTensor(TfLiteEvalTensor* tensor) { - float* tensor_base = tflite::micro::GetTensorData(tensor); - return reinterpret_cast(tensor_base); -} - -TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node, - OpData* op_data) { - // Parse input tensor boxencodings - const TfLiteEvalTensor* input_box_encodings = - tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings); - TF_LITE_ENSURE_EQ(context, input_box_encodings->dims->data[0], kBatchSize); - const int num_boxes = input_box_encodings->dims->data[1]; - TF_LITE_ENSURE(context, input_box_encodings->dims->data[2] >= kNumCoordBox); - const TfLiteEvalTensor* input_anchors = - tflite::micro::GetEvalInput(context, node, kInputTensorAnchors); - - // Decode the boxes to get (ymin, xmin, ymax, xmax) based on the anchors - CenterSizeEncoding box_centersize; - CenterSizeEncoding scale_values = op_data->scale_values; - CenterSizeEncoding anchor; - for (int idx = 0; idx < num_boxes; ++idx) { - switch (input_box_encodings->type) { - // Float - case kTfLiteFloat32: { - // Please see DequantizeBoxEncodings function for the support detail. - const int box_encoding_idx = idx * input_box_encodings->dims->data[2]; - const float* boxes = &(tflite::micro::GetTensorData( - input_box_encodings)[box_encoding_idx]); - box_centersize = *reinterpret_cast(boxes); - anchor = - ReInterpretTensor(input_anchors)[idx]; - break; - } - default: - // Unsupported type. - return kTfLiteError; - } - - float ycenter = static_cast(static_cast(box_centersize.y) / - static_cast(scale_values.y) * - static_cast(anchor.h) + - static_cast(anchor.y)); - - float xcenter = static_cast(static_cast(box_centersize.x) / - static_cast(scale_values.x) * - static_cast(anchor.w) + - static_cast(anchor.x)); - - float half_h = - static_cast(0.5 * - (std::exp(static_cast(box_centersize.h) / - static_cast(scale_values.h))) * - static_cast(anchor.h)); - float half_w = - static_cast(0.5 * - (std::exp(static_cast(box_centersize.w) / - static_cast(scale_values.w))) * - static_cast(anchor.w)); - - float* decoded_boxes = reinterpret_cast( - context->GetScratchBuffer(context, op_data->decoded_boxes_idx)); - auto& box = reinterpret_cast(decoded_boxes)[idx]; - box.ymin = ycenter - half_h; - box.xmin = xcenter - half_w; - box.ymax = ycenter + half_h; - box.xmax = xcenter + half_w; - } - return kTfLiteOk; -} - -void DecreasingPartialArgSort(const float* values, int num_values, - int num_to_sort, int* indices) { - std::iota(indices, indices + num_values, 0); - std::partial_sort(indices, indices + num_to_sort, indices + num_values, - [&values](const int i, const int j) { - return std::tie(values[i], j) > std::tie(values[j], i); - }); -} - -template -void InsertionSort(int* start, int* end, Compare compare) { - for (int* i = start; i != end; ++i) { - std::rotate(std::upper_bound(start, i, *i, compare), i, i + 1); - } -} - -template -void TopDownMerge(int* values, int* scratch, const int half_num_values, - int num_values, Compare compare) { - int left = 0; - int right = half_num_values; - - for (int i = 0; i < num_values; i++) { - if (left >= half_num_values || - (right < num_values && compare(values[right], values[left]))) { - scratch[i] = values[right++]; - } else { - scratch[i] = values[left++]; - } - } - memcpy(values, scratch, num_values * sizeof(int)); -} - -template -void MergeSort(int* values, int* scratch, const int num_values, - Compare compare) { - constexpr int threshold = 20; - - if (num_values < threshold) { - InsertionSort(values, values + num_values, compare); - return; - } - - const int half_num_values = num_values / 2; - - MergeSort(values, scratch, half_num_values, compare); - MergeSort(values + half_num_values, scratch, num_values - half_num_values, - compare); - TopDownMerge(values, scratch, half_num_values, num_values, compare); -} - -void DecreasingArgSort(const float* values, int num_values, int* indices, - int* scratch) { - std::iota(indices, indices + num_values, 0); - - MergeSort(indices, scratch, num_values, [&values](const int i, const int j) { - return values[i] > values[j]; - }); -} - -int SelectDetectionsAboveScoreThreshold(const float* values, int size, - const float threshold, - float* keep_values, int* keep_indices) { - int counter = 0; - for (int i = 0; i < size; i++) { - if (values[i] >= threshold) { - keep_values[counter] = values[i]; - keep_indices[counter] = i; - counter++; - } - } - return counter; -} - -bool ValidateBoxes(const float* decoded_boxes, const int num_boxes) { - for (int i = 0; i < num_boxes; ++i) { - // ymax>=ymin, xmax>=xmin - auto& box = reinterpret_cast(decoded_boxes)[i]; - if (box.ymin >= box.ymax || box.xmin >= box.xmax) { - return false; - } - } - return true; -} - -float ComputeIntersectionOverUnion(const float* decoded_boxes, const int i, - const int j) { - auto& box_i = reinterpret_cast(decoded_boxes)[i]; - auto& box_j = reinterpret_cast(decoded_boxes)[j]; - const float area_i = (box_i.ymax - box_i.ymin) * (box_i.xmax - box_i.xmin); - const float area_j = (box_j.ymax - box_j.ymin) * (box_j.xmax - box_j.xmin); - if (area_i <= 0 || area_j <= 0) return 0.0; - const float intersection_ymin = std::max(box_i.ymin, box_j.ymin); - const float intersection_xmin = std::max(box_i.xmin, box_j.xmin); - const float intersection_ymax = std::min(box_i.ymax, box_j.ymax); - const float intersection_xmax = std::min(box_i.xmax, box_j.xmax); - const float intersection_area = - std::max(intersection_ymax - intersection_ymin, 0.0) * - std::max(intersection_xmax - intersection_xmin, 0.0); - return intersection_area / (area_i + area_j - intersection_area); -} - -// NonMaxSuppressionSingleClass() prunes out the box locations with high overlap -// before selecting the highest scoring boxes (max_detections in number) -// It assumes all boxes are good in beginning and sorts based on the scores. -// If lower-scoring box has too much overlap with a higher-scoring box, -// we get rid of the lower-scoring box. -// Complexity is O(N^2) pairwise comparison between boxes -TfLiteStatus NonMaxSuppressionSingleClassHelper( - TfLiteContext* context, TfLiteNode* node, OpData* op_data, - const float* scores, int* selected, int* selected_size, - int max_detections) { - const TfLiteEvalTensor* input_box_encodings = - tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings); - const int num_boxes = input_box_encodings->dims->data[1]; - const float non_max_suppression_score_threshold = - op_data->non_max_suppression_score_threshold; - const float intersection_over_union_threshold = - op_data->intersection_over_union_threshold; - // Maximum detections should be positive. - TF_LITE_ENSURE(context, (max_detections >= 0)); - // intersection_over_union_threshold should be positive - // and should be less than 1. - TF_LITE_ENSURE(context, (intersection_over_union_threshold > 0.0f) && - (intersection_over_union_threshold <= 1.0f)); - // Validate boxes - float* decoded_boxes = reinterpret_cast( - context->GetScratchBuffer(context, op_data->decoded_boxes_idx)); - - TF_LITE_ENSURE(context, ValidateBoxes(decoded_boxes, num_boxes)); - - // threshold scores - int* keep_indices = reinterpret_cast( - context->GetScratchBuffer(context, op_data->keep_indices_idx)); - float* keep_scores = reinterpret_cast( - context->GetScratchBuffer(context, op_data->keep_scores_idx)); - int num_scores_kept = SelectDetectionsAboveScoreThreshold( - scores, num_boxes, non_max_suppression_score_threshold, keep_scores, - keep_indices); - int* sorted_indices = reinterpret_cast( - context->GetScratchBuffer(context, op_data->sorted_indices_idx)); - - // Reusing keep_indices for scratch buffer and write back its values - // after the sorting is done. - DecreasingArgSort(keep_scores, num_scores_kept, sorted_indices, keep_indices); - int counter = 0; - for (int i = 0; i < num_boxes; i++) { - if (scores[i] >= non_max_suppression_score_threshold) { - keep_indices[counter] = i; - counter++; - } - } - - const int num_boxes_kept = num_scores_kept; - const int output_size = std::min(num_boxes_kept, max_detections); - *selected_size = 0; - - int num_active_candidate = num_boxes_kept; - uint8_t* active_box_candidate = reinterpret_cast( - context->GetScratchBuffer(context, op_data->active_candidate_idx)); - - for (int row = 0; row < num_boxes_kept; row++) { - active_box_candidate[row] = 1; - } - for (int i = 0; i < num_boxes_kept; ++i) { - if (num_active_candidate == 0 || *selected_size >= output_size) break; - if (active_box_candidate[i] == 1) { - selected[(*selected_size)++] = keep_indices[sorted_indices[i]]; - active_box_candidate[i] = 0; - num_active_candidate--; - } else { - continue; - } - for (int j = i + 1; j < num_boxes_kept; ++j) { - if (active_box_candidate[j] == 1) { - float intersection_over_union = ComputeIntersectionOverUnion( - decoded_boxes, keep_indices[sorted_indices[i]], - keep_indices[sorted_indices[j]]); - - if (intersection_over_union > intersection_over_union_threshold) { - active_box_candidate[j] = 0; - num_active_candidate--; - } - } - } - } - - return kTfLiteOk; -} - -// This function implements a regular version of Non Maximal Suppression (NMS) -// for multiple classes where -// 1) we do NMS separately for each class across all anchors and -// 2) keep only the highest anchor scores across all classes -// 3) The worst runtime of the regular NMS is O(K*N^2) -// where N is the number of anchors and K the number of -// classes. -TfLiteStatus NonMaxSuppressionMultiClassRegularHelper(TfLiteContext* context, - TfLiteNode* node, - OpData* op_data, - const float* scores) { - const TfLiteEvalTensor* input_box_encodings = - tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings); - const TfLiteEvalTensor* input_class_predictions = - tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions); - TfLiteEvalTensor* detection_boxes = - tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes); - TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput( - context, node, kOutputTensorDetectionClasses); - TfLiteEvalTensor* detection_scores = - tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores); - TfLiteEvalTensor* num_detections = - tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections); - - const int num_boxes = input_box_encodings->dims->data[1]; - const int num_classes = op_data->num_classes; - const int num_detections_per_class = op_data->detections_per_class; - const int max_detections = op_data->max_detections; - const int num_classes_with_background = - input_class_predictions->dims->data[2]; - // The row index offset is 1 if background class is included and 0 otherwise. - int label_offset = num_classes_with_background - num_classes; - TF_LITE_ENSURE(context, num_detections_per_class > 0); - - // For each class, perform non-max suppression. - float* class_scores = reinterpret_cast( - context->GetScratchBuffer(context, op_data->score_buffer_idx)); - int* box_indices_after_regular_non_max_suppression = reinterpret_cast( - context->GetScratchBuffer(context, op_data->buffer_idx)); - float* scores_after_regular_non_max_suppression = - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scores_after_regular_non_max_suppression_idx)); - - int size_of_sorted_indices = 0; - int* sorted_indices = reinterpret_cast( - context->GetScratchBuffer(context, op_data->sorted_indices_idx)); - float* sorted_values = reinterpret_cast( - context->GetScratchBuffer(context, op_data->sorted_values_idx)); - - for (int col = 0; col < num_classes; col++) { - for (int row = 0; row < num_boxes; row++) { - // Get scores of boxes corresponding to all anchors for single class - class_scores[row] = - *(scores + row * num_classes_with_background + col + label_offset); - } - // Perform non-maximal suppression on single class - int selected_size = 0; - int* selected = reinterpret_cast( - context->GetScratchBuffer(context, op_data->selected_idx)); - TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper( - context, node, op_data, class_scores, selected, &selected_size, - num_detections_per_class)); - // Add selected indices from non-max suppression of boxes in this class - int output_index = size_of_sorted_indices; - for (int i = 0; i < selected_size; i++) { - int selected_index = selected[i]; - - box_indices_after_regular_non_max_suppression[output_index] = - (selected_index * num_classes_with_background + col + label_offset); - scores_after_regular_non_max_suppression[output_index] = - class_scores[selected_index]; - output_index++; - } - // Sort the max scores among the selected indices - // Get the indices for top scores - int num_indices_to_sort = std::min(output_index, max_detections); - DecreasingPartialArgSort(scores_after_regular_non_max_suppression, - output_index, num_indices_to_sort, sorted_indices); - - // Copy values to temporary vectors - for (int row = 0; row < num_indices_to_sort; row++) { - int temp = sorted_indices[row]; - sorted_indices[row] = box_indices_after_regular_non_max_suppression[temp]; - sorted_values[row] = scores_after_regular_non_max_suppression[temp]; - } - // Copy scores and indices from temporary vectors - for (int row = 0; row < num_indices_to_sort; row++) { - box_indices_after_regular_non_max_suppression[row] = sorted_indices[row]; - scores_after_regular_non_max_suppression[row] = sorted_values[row]; - } - size_of_sorted_indices = num_indices_to_sort; - } - - // Allocate output tensors - for (int output_box_index = 0; output_box_index < max_detections; - output_box_index++) { - if (output_box_index < size_of_sorted_indices) { - const int anchor_index = floor( - box_indices_after_regular_non_max_suppression[output_box_index] / - num_classes_with_background); - const int class_index = - box_indices_after_regular_non_max_suppression[output_box_index] - - anchor_index * num_classes_with_background - label_offset; - const float selected_score = - scores_after_regular_non_max_suppression[output_box_index]; - // detection_boxes - float* decoded_boxes = reinterpret_cast( - context->GetScratchBuffer(context, op_data->decoded_boxes_idx)); - ReInterpretTensor(detection_boxes)[output_box_index] = - reinterpret_cast(decoded_boxes)[anchor_index]; - // detection_classes - tflite::micro::GetTensorData(detection_classes)[output_box_index] = - class_index; - // detection_scores - tflite::micro::GetTensorData(detection_scores)[output_box_index] = - selected_score; - } else { - ReInterpretTensor( - detection_boxes)[output_box_index] = {0.0f, 0.0f, 0.0f, 0.0f}; - // detection_classes - tflite::micro::GetTensorData(detection_classes)[output_box_index] = - 0.0f; - // detection_scores - tflite::micro::GetTensorData(detection_scores)[output_box_index] = - 0.0f; - } - } - tflite::micro::GetTensorData(num_detections)[0] = - size_of_sorted_indices; - - return kTfLiteOk; -} - -// This function implements a fast version of Non Maximal Suppression for -// multiple classes where -// 1) we keep the top-k scores for each anchor and -// 2) during NMS, each anchor only uses the highest class score for sorting. -// 3) Compared to standard NMS, the worst runtime of this version is O(N^2) -// instead of O(KN^2) where N is the number of anchors and K the number of -// classes. -TfLiteStatus NonMaxSuppressionMultiClassFastHelper(TfLiteContext* context, - TfLiteNode* node, - OpData* op_data, - const float* scores) { - const TfLiteEvalTensor* input_box_encodings = - tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings); - const TfLiteEvalTensor* input_class_predictions = - tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions); - TfLiteEvalTensor* detection_boxes = - tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes); - - TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput( - context, node, kOutputTensorDetectionClasses); - TfLiteEvalTensor* detection_scores = - tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores); - TfLiteEvalTensor* num_detections = - tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections); - - const int num_boxes = input_box_encodings->dims->data[1]; - const int num_classes = op_data->num_classes; - const int max_categories_per_anchor = op_data->max_classes_per_detection; - const int num_classes_with_background = - input_class_predictions->dims->data[2]; - - // The row index offset is 1 if background class is included and 0 otherwise. - int label_offset = num_classes_with_background - num_classes; - TF_LITE_ENSURE(context, (max_categories_per_anchor > 0)); - const int num_categories_per_anchor = - std::min(max_categories_per_anchor, num_classes); - float* max_scores = reinterpret_cast( - context->GetScratchBuffer(context, op_data->score_buffer_idx)); - int* sorted_class_indices = reinterpret_cast( - context->GetScratchBuffer(context, op_data->buffer_idx)); - - for (int row = 0; row < num_boxes; row++) { - const float* box_scores = - scores + row * num_classes_with_background + label_offset; - int* class_indices = sorted_class_indices + row * num_classes; - DecreasingPartialArgSort(box_scores, num_classes, num_categories_per_anchor, - class_indices); - max_scores[row] = box_scores[class_indices[0]]; - } - - // Perform non-maximal suppression on max scores - int selected_size = 0; - int* selected = reinterpret_cast( - context->GetScratchBuffer(context, op_data->selected_idx)); - TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper( - context, node, op_data, max_scores, selected, &selected_size, - op_data->max_detections)); - - // Allocate output tensors - int output_box_index = 0; - - for (int i = 0; i < selected_size; i++) { - int selected_index = selected[i]; - - const float* box_scores = - scores + selected_index * num_classes_with_background + label_offset; - const int* class_indices = - sorted_class_indices + selected_index * num_classes; - - for (int col = 0; col < num_categories_per_anchor; ++col) { - int box_offset = num_categories_per_anchor * output_box_index + col; - - // detection_boxes - float* decoded_boxes = reinterpret_cast( - context->GetScratchBuffer(context, op_data->decoded_boxes_idx)); - ReInterpretTensor(detection_boxes)[box_offset] = - reinterpret_cast(decoded_boxes)[selected_index]; - - // detection_classes - tflite::micro::GetTensorData(detection_classes)[box_offset] = - class_indices[col]; - - // detection_scores - tflite::micro::GetTensorData(detection_scores)[box_offset] = - box_scores[class_indices[col]]; - - output_box_index++; - } - } - - tflite::micro::GetTensorData(num_detections)[0] = output_box_index; - return kTfLiteOk; -} - -TfLiteStatus NonMaxSuppressionMultiClass(TfLiteContext* context, - TfLiteNode* node, OpData* op_data) { - // Get the input tensors - const TfLiteEvalTensor* input_box_encodings = - tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings); - const TfLiteEvalTensor* input_class_predictions = - tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions); - const int num_boxes = input_box_encodings->dims->data[1]; - const int num_classes = op_data->num_classes; - - TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[0], - kBatchSize); - TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[1], num_boxes); - const int num_classes_with_background = - input_class_predictions->dims->data[2]; - - TF_LITE_ENSURE(context, (num_classes_with_background - num_classes <= 1)); - TF_LITE_ENSURE(context, (num_classes_with_background >= num_classes)); - - const float* scores; - switch (input_class_predictions->type) { - case kTfLiteFloat32: - scores = tflite::micro::GetTensorData(input_class_predictions); - break; - default: - // Unsupported type. - return kTfLiteError; - } - - if (op_data->use_regular_non_max_suppression) { - TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClassRegularHelper( - context, node, op_data, scores)); - } else { - TF_LITE_ENSURE_STATUS( - NonMaxSuppressionMultiClassFastHelper(context, node, op_data, scores)); - } - - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - TF_LITE_ENSURE(context, (kBatchSize == 1)); - auto* op_data = static_cast(node->user_data); - - // These two functions correspond to two blocks in the Object Detection model. - // In future, we would like to break the custom op in two blocks, which is - // currently not feasible because we would like to input quantized inputs - // and do all calculations in float. Mixed quantized/float calculations are - // currently not supported in TFLite. - - // This fills in temporary decoded_boxes - // by transforming input_box_encodings and input_anchors from - // CenterSizeEncodings to BoxCornerEncoding - TF_LITE_ENSURE_STATUS(DecodeCenterSizeBoxes(context, node, op_data)); - - // This fills in the output tensors - // by choosing effective set of decoded boxes - // based on Non Maximal Suppression, i.e. selecting - // highest scoring non-overlapping boxes. - TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClass(context, node, op_data)); - - return kTfLiteOk; -} -} // namespace - -TfLiteRegistration* Register_DETECTION_POSTPROCESS() { - static TfLiteRegistration r = tflite::micro::RegisterOp(Init, Prepare, Eval); - return &r; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h deleted file mode 100644 index f5b9eae0..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H -#define TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H - -extern const int g_gen_data_size_none_regular_nms; -extern const unsigned char g_gen_data_none_regular_nms[]; - -extern const int g_gen_data_size_regular_nms; -extern const unsigned char g_gen_data_regular_nms[]; - -#endif diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/README.md b/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/README.md deleted file mode 100644 index b0c215fb..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# Info - -These are the Espressif chipset specific replacement kernels. -The kernels call optimized routines or reference routines depending upon optimization option selected. - -By default optimizations are selected if available. -To change this behaviour, please make the appropriate `ESP-NN` menu selection after running: - -``` -idf.py menuconfig -``` diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/add.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/add.cc deleted file mode 100644 index 2f1ac58d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/add.cc +++ /dev/null @@ -1,202 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/reference/add.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h" -#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/add.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -#include - -#if ESP_NN -#include -#endif - -long long add_total_time = 0; - -namespace tflite { - -void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, - const OpDataAdd* data, const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { - tflite::ArithmeticParams op_params; - SetActivationParams(data->output_activation_min_f32, - data->output_activation_max_f32, &op_params); - if (data->requires_broadcast) { - reference_ops::BroadcastAdd4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } -} - -TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteAddParams* params, const OpDataAdd* data, - const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, - TfLiteEvalTensor* output) { - tflite::ArithmeticParams op_params; - op_params.left_shift = data->left_shift; - op_params.input1_offset = data->input1_offset; - op_params.input1_multiplier = data->input1_multiplier; - op_params.input1_shift = data->input1_shift; - op_params.input2_offset = data->input2_offset; - op_params.input2_multiplier = data->input2_multiplier; - op_params.input2_shift = data->input2_shift; - op_params.output_offset = data->output_offset; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; - SetActivationParams(data->output_activation_min, data->output_activation_max, - &op_params); - bool need_broadcast = reference_ops::ProcessBroadcastShapes( - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorShape(input2), &op_params); - - switch (output->type) { - case kTfLiteInt8: { - if (need_broadcast) { - reference_integer_ops::BroadcastAdd4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { -#if ESP_NN - const int8_t *input1_data = tflite::micro::GetTensorData(input1); - const int8_t *input2_data = tflite::micro::GetTensorData(input2); - int8_t *out_data = tflite::micro::GetTensorData(output); - - esp_nn_add_elementwise_s8(input1_data, - input2_data, - data->input1_offset, - data->input2_offset, - data->input1_multiplier, - data->input2_multiplier, - data->input1_shift, - data->input2_shift, - data->left_shift, - out_data, - data->output_offset, - data->output_multiplier, - data->output_shift, - data->output_activation_min, - data->output_activation_max, - MatchingElementsSize(tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorShape(output)) - ); -#else - reference_integer_ops::Add( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -#endif - } - break; - } - case kTfLiteInt16: { - if (need_broadcast) { - reference_ops::BroadcastAdd4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output), - false); - } - break; - } - default: - MicroPrintf("Type %s (%d) not supported.", - TfLiteTypeGetName(output->type), output->type); - return kTfLiteError; - } - - return kTfLiteOk; -} - -void* AddInit(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd)); -} - -TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) { - auto* params = reinterpret_cast(node->builtin_data); - - TFLITE_DCHECK(node->user_data != nullptr); - const OpDataAdd* data = static_cast(node->user_data); - - const TfLiteEvalTensor* input1 = - tflite::micro::GetEvalInput(context, node, kAddInputTensor1); - const TfLiteEvalTensor* input2 = - tflite::micro::GetEvalInput(context, node, kAddInputTensor2); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kAddOutputTensor); - - long long start_time = esp_timer_get_time(); - - if (output->type == kTfLiteFloat32) { - EvalAdd(context, node, params, data, input1, input2, output); - } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { - TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data, - input1, input2, output)); - } else { - MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type), - output->type); - return kTfLiteError; - } - add_total_time += esp_timer_get_time() - start_time; - - return kTfLiteOk; -} - -TfLiteRegistration Register_ADD() { - return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/fully_connected.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/fully_connected.cc deleted file mode 100644 index 484cffb6..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/fully_connected.cc +++ /dev/null @@ -1,191 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/kernels/fully_connected.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/fully_connected.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -#if ESP_NN -#include -#endif - -#include - -long long fc_total_time = 0; - -namespace tflite { -namespace { - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, - sizeof(OpDataFullyConnected)); -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TFLITE_DCHECK(node->user_data != nullptr); - TFLITE_DCHECK(node->builtin_data != nullptr); - - auto* data = static_cast(node->user_data); - const auto params = - static_cast(node->builtin_data); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* filter = micro_context->AllocateTempInputTensor( - node, kFullyConnectedWeightsTensor); - TF_LITE_ENSURE(context, filter != nullptr); - TfLiteTensor* bias = - micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor); - TfLiteTensor* output = micro_context->AllocateTempOutputTensor( - node, kFullyConnectedOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); - TF_LITE_ENSURE_MSG(context, input->type == filter->type, - "Hybrid models are not supported on TFLite Micro."); - - TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected( - context, params->activation, input->type, - input, filter, bias, output, data)); - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(filter); - if (bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(bias); - } - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->builtin_data != nullptr); - const auto* params = - static_cast(node->builtin_data); - - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor); - const TfLiteEvalTensor* filter = - tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor); - const TfLiteEvalTensor* bias = - tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor); - - TFLITE_DCHECK(node->user_data != nullptr); - const auto& data = - *(static_cast(node->user_data)); - - long long start_time = esp_timer_get_time(); - // Checks in Prepare ensure input, output and filter types are all the same. - switch (input->type) { - case kTfLiteFloat32: { - tflite::reference_ops::FullyConnected( - FullyConnectedParamsFloat(params->activation), - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - tflite::micro::GetTensorShape(bias), - tflite::micro::GetTensorData(bias), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - break; - } - - case kTfLiteInt8: { - const int32_t* bias_data = - nullptr != bias ? tflite::micro::GetTensorData(bias) - : nullptr; -#if ESP_NN - const RuntimeShape& filter_shape = tflite::micro::GetTensorShape(filter); - const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output); - const int filter_dim_count = filter_shape.DimensionsCount(); - const int batches = output_shape.Dims(0); - const int output_depth = output_shape.Dims(1); - TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); - const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - - const int8_t *input_data = tflite::micro::GetTensorData(input); - int8_t *output_data = tflite::micro::GetTensorData(output); - const int8_t *filter_data = tflite::micro::GetTensorData(filter); - - for (int b = 0; b < batches; ++b) { - esp_nn_fully_connected_s8(input_data, -data.input_zero_point, - accum_depth, - filter_data, -data.filter_zero_point, - bias_data, output_data, output_depth, - data.output_zero_point, - data.output_shift, data.output_multiplier, - data.output_activation_min, - data.output_activation_max); - input_data += accum_depth; - output_data += output_depth; - } -#else - tflite::reference_integer_ops::FullyConnected( - FullyConnectedParamsQuantized(data), - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - tflite::micro::GetTensorShape(bias), bias_data, - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -#endif - break; - } - - case kTfLiteUInt8: { - tflite::reference_ops::FullyConnected( - FullyConnectedParamsQuantized(data), - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - tflite::micro::GetTensorShape(bias), - tflite::micro::GetTensorData(bias), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - break; - } - default: { - TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", - TfLiteTypeGetName(input->type), input->type); - return kTfLiteError; - } - } - fc_total_time += esp_timer_get_time() - start_time; - return kTfLiteOk; -} - -} // namespace - -TfLiteRegistration Register_FULLY_CONNECTED() { - return tflite::micro::RegisterOp(Init, Prepare, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/mul.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/mul.cc deleted file mode 100644 index 02413f5c..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/mul.cc +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/kernels/mul.h" - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" -#include "tensorflow/lite/kernels/internal/reference/mul.h" -#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -#if ESP_NN -#include -#endif - -#include - -long long mul_total_time = 0; - -namespace tflite { -#if ESP_NN -void MulEvalQuantized(TfLiteContext* context, TfLiteNode* node, - const OpDataMul* data, const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, - TfLiteEvalTensor* output) { - tflite::ArithmeticParams op_params = {}; - op_params.quantized_activation_min = data->output_activation_min; - op_params.quantized_activation_max = data->output_activation_max; - op_params.float_activation_max = data->output_activation_max_f32; - op_params.input1_offset = -data->input1_zero_point; - op_params.input2_offset = -data->input2_zero_point; - op_params.output_offset = data->output_zero_point; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; - - bool need_broadcast = reference_ops::ProcessBroadcastShapes( - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorShape(input2), &op_params); - - if (need_broadcast) { - reference_integer_ops::BroadcastMul4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - const int8_t *input1_data = tflite::micro::GetTensorData(input1); - const int8_t *input2_data = tflite::micro::GetTensorData(input2); - int8_t *out_data = tflite::micro::GetTensorData(output); - - esp_nn_mul_elementwise_s8(input1_data, input2_data, op_params.input1_offset, - op_params.input2_offset, out_data, op_params.output_offset, - op_params.output_multiplier, op_params.output_shift, - op_params.quantized_activation_min, op_params.quantized_activation_max, - MatchingElementsSize(tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorShape(output))); - } -} -#endif - -TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->builtin_data != nullptr); - auto* params = reinterpret_cast(node->builtin_data); - - TFLITE_DCHECK(node->user_data != nullptr); - const OpDataMul* data = static_cast(node->user_data); - - const TfLiteEvalTensor* input1 = - tflite::micro::GetEvalInput(context, node, kMulInput1Tensor); - const TfLiteEvalTensor* input2 = - tflite::micro::GetEvalInput(context, node, kMulInput2Tensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kMulOutputTensor); - - long long start_time = esp_timer_get_time(); - switch (input1->type) { - case kTfLiteInt8: -#if ESP_NN - MulEvalQuantized(context, node, data, input1, input2, output); -#else - EvalMulQuantizedReference(context, node, data, input1, input2, output); -#endif - break; - case kTfLiteInt32: - EvalMulQuantizedReference(context, node, data, input1, input2, output); - break; - case kTfLiteFloat32: - EvalMulFloatReference(context, node, params, data, input1, input2, - output); - break; - default: - MicroPrintf("Type %s (%d) not supported.", - TfLiteTypeGetName(input1->type), input1->type); - return kTfLiteError; - } - mul_total_time += esp_timer_get_time() - start_time; - return kTfLiteOk; -} - -TfLiteRegistration Register_MUL() { - return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/pooling.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/pooling.cc deleted file mode 100644 index b450929e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/pooling.cc +++ /dev/null @@ -1,231 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/kernels/internal/reference/pooling.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/pooling.h" - -#if ESP_NN -#include -#endif - -#include - -long long pooling_total_time = 0; - -namespace tflite { - -namespace { -#if ESP_NN -void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, - const TfLitePoolParams* params, const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - - const int stride_height = params->stride_height; - const int stride_width = params->stride_width; - const int filter_height = params->filter_height; - const int filter_width = params->filter_width; - const int activation_min = data->activation_min; - const int activation_max = data->activation_max; - const int pad_height = data->padding.height; - const int pad_width = data->padding.width; - - const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input); - const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output); - TFLITE_DCHECK_LE(activation_min, activation_max); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - - const int8_t *input_data = tflite::micro::GetTensorData(input); - int8_t *output_data = tflite::micro::GetTensorData(output); - - const int input_size = input_width * input_height * depth; - const int output_size = output_width * output_height * depth; - - if (depth % 4 == 0) { // S3 version only supports channels multiple of 4 - for (int batch = 0; batch < batches; ++batch) { - esp_nn_avg_pool_s8(input_data, input_width, input_height, - output_data, output_width, output_height, - stride_width, stride_height, - filter_width, filter_height, - pad_width, pad_height, - activation_min, activation_max, depth); - input_data += input_size; - output_data += output_size; - } - } else { - for (int batch = 0; batch < batches; ++batch) { - esp_nn_avg_pool_s8_ansi(input_data, input_width, input_height, - output_data, output_width, output_height, - stride_width, stride_height, - filter_width, filter_height, - pad_width, pad_height, - activation_min, activation_max, depth); - input_data += input_size; - output_data += output_size; - } - } -} - -void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, const OpDataPooling* data, - const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { - - const int stride_height = params->stride_height; - const int stride_width = params->stride_width; - const int filter_height = params->filter_height; - const int filter_width = params->filter_width; - const int activation_min = data->activation_min; - const int activation_max = data->activation_max; - const int pad_height = data->padding.height; - const int pad_width = data->padding.width; - - const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input); - const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output); - TFLITE_DCHECK_LE(activation_min, activation_max); - TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); - const int batches = MatchingDim(input_shape, 0, output_shape, 0); - const int depth = MatchingDim(input_shape, 3, output_shape, 3); - const int input_height = input_shape.Dims(1); - const int input_width = input_shape.Dims(2); - const int output_height = output_shape.Dims(1); - const int output_width = output_shape.Dims(2); - - const int8_t *input_data = tflite::micro::GetTensorData(input); - int8_t *output_data = tflite::micro::GetTensorData(output); - - const int input_size = input_width * input_height * depth; - const int output_size = output_width * output_height * depth; - if (depth % 4 == 0) { // S3 version only supports channels multiple of 4 - for (int batch = 0; batch < batches; ++batch) { - esp_nn_max_pool_s8(input_data, input_width, input_height, - output_data, output_width, output_height, - stride_width, stride_height, - filter_width, filter_height, - pad_width, pad_height, - activation_min, activation_max, depth); - input_data += input_size; - output_data += output_size; - } - } else { - for (int batch = 0; batch < batches; ++batch) { - esp_nn_max_pool_s8_ansi(input_data, input_width, input_height, - output_data, output_width, output_height, - stride_width, stride_height, - filter_width, filter_height, - pad_width, pad_height, - activation_min, activation_max, depth); - input_data += input_size; - output_data += output_size; - } - } -} -#endif - -TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->builtin_data != nullptr); - auto* params = reinterpret_cast(node->builtin_data); - - TFLITE_DCHECK(node->user_data != nullptr); - const OpDataPooling* data = - static_cast(node->user_data); - - const TfLiteEvalTensor* input = - micro::GetEvalInput(context, node, kPoolingInputTensor); - TfLiteEvalTensor* output = - micro::GetEvalOutput(context, node, kPoolingOutputTensor); - - long long start_time = esp_timer_get_time(); - // Inputs and outputs share the same type, guaranteed by the converter. - switch (input->type) { - case kTfLiteFloat32: - AveragePoolingEvalFloat(context, node, params, data, input, output); - break; - case kTfLiteInt8: -#if ESP_NN - AverageEvalQuantized(context, node, params, data, input, output); -#else - AveragePoolingEvalQuantized(context, node, params, data, input, output); -#endif - break; - default: - TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported", - TfLiteTypeGetName(input->type)); - return kTfLiteError; - } - pooling_total_time += esp_timer_get_time() - start_time; - return kTfLiteOk; -} - -TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->builtin_data != nullptr); - auto* params = reinterpret_cast(node->builtin_data); - - TFLITE_DCHECK(node->user_data != nullptr); - const OpDataPooling* data = - static_cast(node->user_data); - - const TfLiteEvalTensor* input = - micro::GetEvalInput(context, node, kPoolingInputTensor); - TfLiteEvalTensor* output = - micro::GetEvalOutput(context, node, kPoolingOutputTensor); - - long long start_time = esp_timer_get_time(); - switch (input->type) { - case kTfLiteFloat32: - MaxPoolingEvalFloat(context, node, params, data, input, output); - break; - case kTfLiteInt8: -#if ESP_NN - MaxEvalQuantized(context, node, params, data, input, output); -#else - MaxPoolingEvalQuantized(context, node, params, data, input, output); -#endif - break; - default: - TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.", - TfLiteTypeGetName(input->type)); - return kTfLiteError; - } - pooling_total_time += esp_timer_get_time() - start_time; - return kTfLiteOk; -} - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling)); -} - -} // namespace - -TfLiteRegistration Register_AVERAGE_POOL_2D() { - return tflite::micro::RegisterOp(Init, PoolingPrepare, AverageEval); -} - -TfLiteRegistration Register_MAX_POOL_2D() { - return tflite::micro::RegisterOp(Init, PoolingPrepare, MaxEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/ethosu.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/ethosu.cc deleted file mode 100644 index c305121e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/ethosu.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// -// This is a stub file for non-Ethos platforms -// -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -TfLiteRegistration* Register_ETHOSU() { return nullptr; } - -const char* GetString_ETHOSU() { return ""; } - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/ethosu.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/ethosu.h deleted file mode 100644 index cfbb0d3f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/ethosu.h +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_ - -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -TfLiteRegistration* Register_ETHOSU(); - -const char* GetString_ETHOSU(); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/floor.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/floor.cc deleted file mode 100644 index 6b2a4cc2..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/floor.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/reference/floor.h" - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -namespace tflite { -namespace ops { -namespace micro { -namespace floor { - -constexpr int kInputTensor = 0; -constexpr int kOutputTensor = 0; - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kInputTensor); - TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kOutputTensor); - reference_ops::Floor(tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - return kTfLiteOk; -} -} // namespace floor - -TfLiteRegistration Register_FLOOR() { - return tflite::micro::RegisterOp(nullptr, nullptr, floor::Eval); -} - -} // namespace micro -} // namespace ops -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/fully_connected.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/fully_connected.h deleted file mode 100644 index 93026cd5..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/fully_connected.h +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_ - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -struct OpDataFullyConnected { - // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. - int32_t output_multiplier; - int output_shift; - // The range of the fused activation layer. For example for kNone and - // uint8_t these would be 0 and 255. - int32_t output_activation_min; - int32_t output_activation_max; - // The index of the temporary tensor where the quantized inputs are cached. - int input_quantized_index; - // Cached zero point values of tensors. - int32_t input_zero_point; - int32_t filter_zero_point; - int32_t output_zero_point; -}; - -extern const int kFullyConnectedInputTensor; -extern const int kFullyConnectedWeightsTensor; -extern const int kFullyConnectedBiasTensor; -extern const int kFullyConnectedOutputTensor; - -// Returns a FullyConnectedParams struct with all the parameters needed for a -// float computation. -FullyConnectedParams FullyConnectedParamsFloat( - TfLiteFusedActivation activation); - -// Returns a FullyConnectedParams struct with all the parameters needed for a -// quantized computation. -FullyConnectedParams FullyConnectedParamsQuantized( - const OpDataFullyConnected& op_data); - -TfLiteStatus CalculateOpDataFullyConnected( - TfLiteContext* context, TfLiteFusedActivation activation, - TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output, OpDataFullyConnected* data); - -// This is the most generic TfLiteRegistration. The actual supported types may -// still be target dependent. The only requirement is that every implementation -// (reference or optimized) must define this function. -TfLiteRegistration Register_FULLY_CONNECTED(); - -#if defined(CMSIS_NN) || defined(HEXAGON) -// Returns a TfLiteRegistration struct for kernel variant that only supports -// int8. -TfLiteRegistration Register_FULLY_CONNECTED_INT8(); - -#else -// Note that while this block gets used for both reference and optimized kernels -// that do not have any specialized implementations, the only goal here is to -// define fallback implementation that allow reference kernels to still be used -// from applications that call a more specific kernel variant. - -inline TfLiteRegistration Register_FULLY_CONNECTED_INT8() { - return Register_FULLY_CONNECTED(); -} - -#endif - -#if defined(CMSIS_NN) -// Returns a TfLiteRegistration struct for kernel variant that only supports -// int16. -TfLiteRegistration Register_FULLY_CONNECTED_INT16(); - -#else -// Note that while this block gets used for both reference and optimized kernels -// that do not have any specialized implementations, the only goal here is to -// define fallback implementation that allow reference kernels to still be used -// from applications that call a more specific kernel variant. - -inline TfLiteRegistration Register_FULLY_CONNECTED_INT16() { - return Register_FULLY_CONNECTED(); -} - -#endif - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/fully_connected_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/fully_connected_common.cc deleted file mode 100644 index e7d0056c..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/fully_connected_common.cc +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/fully_connected.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/fully_connected.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -namespace tflite { - -const int kFullyConnectedInputTensor = 0; -const int kFullyConnectedWeightsTensor = 1; -const int kFullyConnectedBiasTensor = 2; -const int kFullyConnectedOutputTensor = 0; - -FullyConnectedParams FullyConnectedParamsQuantized( - const OpDataFullyConnected& op_data) { - FullyConnectedParams op_params; - op_params.input_offset = -op_data.input_zero_point; - op_params.weights_offset = -op_data.filter_zero_point; - op_params.output_offset = op_data.output_zero_point; - op_params.output_multiplier = op_data.output_multiplier; - op_params.output_shift = op_data.output_shift; - op_params.quantized_activation_min = op_data.output_activation_min; - op_params.quantized_activation_max = op_data.output_activation_max; - return op_params; -} - -FullyConnectedParams FullyConnectedParamsFloat( - TfLiteFusedActivation activation) { - FullyConnectedParams op_params; - CalculateActivationRange(activation, &op_params.float_activation_min, - &op_params.float_activation_max); - return op_params; -} - -TfLiteStatus CalculateOpDataFullyConnected( - TfLiteContext* context, TfLiteFusedActivation activation, - TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output, - OpDataFullyConnected* data) { - if (data_type != kTfLiteFloat32) { - double real_multiplier = 0.0; - TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( - context, input, filter, bias, output, &real_multiplier)); - QuantizeMultiplier(real_multiplier, &data->output_multiplier, - &data->output_shift); - - data->input_zero_point = input->params.zero_point; - // Filter weights will always be symmetric quantized since we only support - // int8 quantization. See - // https://github.com/tensorflow/tensorflow/issues/44912 for additional - // context. - TFLITE_DCHECK(filter->params.zero_point == 0); - data->filter_zero_point = filter->params.zero_point; - data->output_zero_point = output->params.zero_point; - - return CalculateActivationRangeQuantized(context, activation, output, - &data->output_activation_min, - &data->output_activation_max); - } - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/hard_swish.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/hard_swish.cc deleted file mode 100644 index 055e12e6..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/hard_swish.cc +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/reference/hard_swish.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/hard_swish.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { -namespace { -void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams)); -} - -TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kHardSwishInputTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kHardSwishOutputTensor); - HardSwishParams* params = static_cast(node->user_data); - - switch (input->type) { - case kTfLiteFloat32: { - tflite::reference_ops::HardSwish( - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } break; - case kTfLiteInt8: { - tflite::reference_ops::HardSwish( - *params, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } break; - default: { - MicroPrintf("Unsupported type %s", TfLiteTypeGetName(input->type)); - return kTfLiteError; - } - } - return kTfLiteOk; -} - -} // namespace - -TfLiteRegistration Register_HARD_SWISH() { - return tflite::micro::RegisterOp(HardSwishInit, tflite::HardSwishPrepare, - HardSwishEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/hard_swish.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/hard_swish.h deleted file mode 100644 index 3ffe60dc..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/hard_swish.h +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -extern const int kHardSwishInputTensor; -extern const int kHardSwishOutputTensor; - -TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node); -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/hard_swish_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/hard_swish_common.cc deleted file mode 100644 index 8f846522..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/hard_swish_common.cc +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/hard_swish.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/hard_swish.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { - -const int kHardSwishInputTensor = 0; -const int kHardSwishOutputTensor = 0; - -TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TFLITE_DCHECK(node->user_data != nullptr); - TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kHardSwishInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kHardSwishOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - if (input->type == kTfLiteInt8) { - HardSwishParams* params = static_cast(node->user_data); - - params->input_zero_point = input->params.zero_point; - params->output_zero_point = output->params.zero_point; - - const float input_scale = input->params.scale; - const float hires_input_scale = (1.0f / 128.0f) * input_scale; - const float reluish_scale = 3.0f / 32768.0f; - const float output_scale = output->params.scale; - - const double output_multiplier = - static_cast(hires_input_scale / output_scale); - int32_t output_multiplier_fixedpoint_int32; - QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32, - ¶ms->output_multiplier_exponent); - DownScaleInt32ToInt16Multiplier( - output_multiplier_fixedpoint_int32, - ¶ms->output_multiplier_fixedpoint_int16); - - TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0); - - const double reluish_multiplier = - static_cast(hires_input_scale / reluish_scale); - int32_t reluish_multiplier_fixedpoint_int32; - QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32, - ¶ms->reluish_multiplier_exponent); - DownScaleInt32ToInt16Multiplier( - reluish_multiplier_fixedpoint_int32, - ¶ms->reluish_multiplier_fixedpoint_int16); - } - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/if.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/if.cc deleted file mode 100644 index 39eca8b4..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/if.cc +++ /dev/null @@ -1,121 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_context.h" -#include "tensorflow/lite/micro/micro_graph.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -namespace { - -struct OpData { - int then_subgraph_index; - int else_subgraph_index; -}; - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpData)); -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - OpData* op_data = reinterpret_cast(node->user_data); - const auto* params = - reinterpret_cast(node->builtin_data); - op_data->then_subgraph_index = params->then_subgraph_index; - op_data->else_subgraph_index = params->else_subgraph_index; - - TF_LITE_ENSURE(context, node->inputs->size > 0); - - // The first input is the condition. - tflite::MicroContext* micro_context = tflite::GetMicroContext(context); - TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0); - - TF_LITE_ENSURE(context, cond != nullptr); - TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool); - TF_LITE_ENSURE_EQ(context, NumElements(cond), 1); - - micro_context->DeallocateTempTfLiteTensor(cond); - - // The first input of the node is the condition. The rest of inputs are - // passed to the branch subgraphs. Therefore, the number of subgraph inputs - // will be the number of node inputs - 1. - size_t num_inputs = node->inputs->size - 1; - size_t num_outputs = node->outputs->size; - - MicroGraph& graph_info = micro_context->graph(); - - TF_LITE_ENSURE(context, - op_data->then_subgraph_index < graph_info.NumSubgraphs()); - TF_LITE_ENSURE(context, - op_data->else_subgraph_index < graph_info.NumSubgraphs()); - - TF_LITE_ENSURE_EQ(context, num_inputs, - graph_info.NumSubgraphInputs(op_data->then_subgraph_index)); - TF_LITE_ENSURE_EQ( - context, num_outputs, - graph_info.NumSubgraphOutputs(op_data->then_subgraph_index)); - - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const OpData* op_data = reinterpret_cast(node->user_data); - - tflite::MicroContext* micro_context = tflite::GetMicroContext(context); - TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0); - - TF_LITE_ENSURE(context, cond != nullptr); - bool cond_value = cond->data.b[0]; - micro_context->DeallocateTempTfLiteTensor(cond); - - MicroGraph* graph_info = µ_context->graph(); - // Currently we copy the input / output between the subgraphs. - int active_branch_subgraph_index = - cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index; - - TF_LITE_ENSURE_OK(context, - tflite::micro::CopyOpInputsToSubgraphInputs( - context, node, graph_info, active_branch_subgraph_index, - /*first_tensor_idx=*/1)); - - TF_LITE_ENSURE_OK(context, - graph_info->InvokeSubgraph(active_branch_subgraph_index)); - - TF_LITE_ENSURE_OK( - context, tflite::micro::CopySubgraphOutputsToOpOutputs( - context, node, graph_info, active_branch_subgraph_index)); - - return kTfLiteOk; -} - -} // namespace. - -TfLiteRegistration Register_IF() { - return tflite::micro::RegisterOp(Init, Prepare, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/leaky_relu.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/leaky_relu.cc deleted file mode 100644 index 96c1b1b1..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/leaky_relu.cc +++ /dev/null @@ -1,95 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h" - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/leaky_relu.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -template -void QuantizeLeakyRelu(const LeakyReluOpData& data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - LeakyReluParams op_params = {}; - - op_params.input_offset = data.input_zero_point; - op_params.output_offset = data.output_zero_point; - op_params.output_multiplier_alpha = data.output_multiplier_alpha; - op_params.output_shift_alpha = data.output_shift_alpha; - op_params.output_multiplier_identity = data.output_multiplier_identity; - op_params.output_shift_identity = data.output_shift_identity; - reference_ops::QuantizeLeakyRelu(op_params, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - -void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(LeakyReluOpData)); -} - -TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kInputTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kOutputTensor); - const LeakyReluOpData& data = *static_cast(node->user_data); - - switch (input->type) { - case kTfLiteFloat32: { - LeakyReluParams op_params = {}; - const auto* params = - static_cast(node->builtin_data); - - op_params.alpha = params->alpha; - reference_ops::LeakyRelu(op_params, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - return kTfLiteOk; - } break; - case kTfLiteInt8: { - QuantizeLeakyRelu(data, input, output); - return kTfLiteOk; - } break; - case kTfLiteInt16: { - QuantizeLeakyRelu(data, input, output); - return kTfLiteOk; - } break; - default: - MicroPrintf("Only float32, int8 are supported by LEAKY_RELU, got %s.", - TfLiteTypeGetName(input->type)); - return kTfLiteError; - } - - return kTfLiteError; -} - -TfLiteRegistration Register_LEAKY_RELU() { - return tflite::micro::RegisterOp(LeakyReluInit, LeakyReluPrepare, - LeakyReluEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/leaky_relu.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/leaky_relu.h deleted file mode 100644 index dfcd6e93..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/leaky_relu.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_ - -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -// Input/output tensor index. -extern const int kInputTensor; -extern const int kOutputTensor; - -struct LeakyReluOpData { - // quantization parameters - int32_t output_multiplier_alpha; - int32_t output_shift_alpha; - int32_t output_multiplier_identity; - int32_t output_shift_identity; - int32_t input_zero_point; - int32_t output_zero_point; -}; - -TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context, TfLiteNode* node); - -TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/leaky_relu_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/leaky_relu_common.cc deleted file mode 100644 index 3d1ffebb..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/leaky_relu_common.cc +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h" -#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/leaky_relu.h" - -namespace tflite { - -// Input/output tensor index. -const int kInputTensor = 0; -const int kOutputTensor = 0; - -TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context, - TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); - - if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { - LeakyReluOpData* data = static_cast(node->user_data); - const auto* params = - static_cast(node->builtin_data); - - data->input_zero_point = input->params.zero_point; - data->output_zero_point = output->params.zero_point; - - int output_shift_alpha; - double alpha_multiplier = static_cast( - input->params.scale * params->alpha / output->params.scale); - QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha, - &output_shift_alpha); - data->output_shift_alpha = static_cast(output_shift_alpha); - - int output_shift_identity; - double identity_multiplier = - static_cast(input->params.scale / output->params.scale); - QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity, - &output_shift_identity); - data->output_shift_identity = static_cast(output_shift_identity); - } - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - - return kTfLiteOk; -} - -TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) { - return CalculateOpDataLeakyRelu(context, node); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logical.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/logical.cc deleted file mode 100644 index c85e0c5b..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logical.cc +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/micro/kernels/logical.h" - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/reference/binary_function.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -namespace tflite { -namespace { - -TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) { - return LogicalImpl(context, node, LogicalOr); -} - -TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) { - return LogicalImpl(context, node, LogicalAnd); -} - -} // namespace - -TfLiteRegistration Register_LOGICAL_OR() { - return tflite::micro::RegisterOp(nullptr, nullptr, LogicalOrEval); -} - -TfLiteRegistration Register_LOGICAL_AND() { - return tflite::micro::RegisterOp(nullptr, nullptr, LogicalAndEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logical.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/logical.h deleted file mode 100644 index e70e4576..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logical.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" - -namespace tflite { -// Input/output tensor index. -extern const int kLogicalInputTensor1; -extern const int kLogicalInputTensor2; -extern const int kLogicalOutputTensor; - -TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node, - bool (*func)(bool, bool)); - -bool LogicalOr(bool x, bool y); -bool LogicalAnd(bool x, bool y); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logical_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/logical_common.cc deleted file mode 100644 index 2612d3a4..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logical_common.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/reference/binary_function.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/logical.h" - -namespace tflite { - -// Input/output tensor index. -const int kLogicalInputTensor1 = 0; -const int kLogicalInputTensor2 = 1; -const int kLogicalOutputTensor = 0; - -TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node, - bool (*func)(bool, bool)) { - const TfLiteEvalTensor* input1 = - tflite::micro::GetEvalInput(context, node, kLogicalInputTensor1); - const TfLiteEvalTensor* input2 = - tflite::micro::GetEvalInput(context, node, kLogicalInputTensor2); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kLogicalOutputTensor); - - if (tflite::micro::HaveSameShapes(input1, input2)) { - reference_ops::BinaryFunction( - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output), func); - } else { - reference_ops::BroadcastBinaryFunction4DSlow( - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output), func); - } - - return kTfLiteOk; -} - -bool LogicalOr(bool x, bool y) { return x || y; } - -bool LogicalAnd(bool x, bool y) { return x && y; } - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logistic.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/logistic.cc deleted file mode 100644 index f8ac1c23..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logistic.cc +++ /dev/null @@ -1,111 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/logistic.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/logistic.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { -namespace { - -void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic)); -} - -TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kLogisticInputTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor); - - TFLITE_DCHECK(node->user_data != nullptr); - OpDataLogistic* data = static_cast(node->user_data); - - if (input->type == kTfLiteFloat32) { - switch (output->type) { - case kTfLiteFloat32: { - reference_ops::Logistic(tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - return kTfLiteOk; - } - default: - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - } else if (input->type == kTfLiteInt16) { - switch (output->type) { - case kTfLiteInt16: { - reference_integer_ops::Logistic( - data->input_multiplier, data->input_left_shift, - NumElements(input->dims), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorData(output)); - return kTfLiteOk; - } - default: - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - } else if (input->type == kTfLiteInt8) { - switch (output->type) { - case kTfLiteInt8: { - reference_integer_ops::Logistic( - data->input_zero_point, data->input_range_radius, - data->input_multiplier, data->input_left_shift, - NumElements(input->dims), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorData(output)); - return kTfLiteOk; - } - default: - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - } else { - // TODO(b/141211002): Also support other data types once we have supported - // temporary tensors in TFLM. - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - return kTfLiteOk; -} - -} // namespace - -TfLiteRegistration Register_LOGISTIC() { - return tflite::micro::RegisterOp(LogisticInit, LogisticPrepare, LogisticEval); -} -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logistic.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/logistic.h deleted file mode 100644 index 1de0cdab..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logistic.h +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_ - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" - -namespace tflite { -extern const int kLogisticInputTensor; -extern const int kLogisticOutputTensor; - -struct OpDataLogistic { - int32_t input_zero_point; - int32_t input_range_radius; - int32_t input_multiplier; - int input_left_shift; -}; - -TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context, - TfLiteNode* node, - OpDataLogistic* data); - -TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node); - -} // namespace tflite -#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logistic_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/logistic_common.cc deleted file mode 100644 index a79fd6bb..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/logistic_common.cc +++ /dev/null @@ -1,119 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h" -#include "tensorflow/lite/kernels/internal/reference/logistic.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/logistic.h" - -namespace tflite { -const int kLogisticInputTensor = 0; -const int kLogisticOutputTensor = 0; - -TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context, - TfLiteNode* node, - OpDataLogistic* data) { - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kLogisticInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kLogisticOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); - if (input->type == kTfLiteInt8) { - TF_LITE_ENSURE_EQ(context, output->params.zero_point, - std::numeric_limits::min()); - - static constexpr int kInputIntegerBits = 4; - const double input_real_multiplier = - static_cast(input->params.scale) * - static_cast(1 << (31 - kInputIntegerBits)); - - data->input_zero_point = input->params.zero_point; - - const double q = std::frexp(input_real_multiplier, &data->input_left_shift); - data->input_multiplier = static_cast(TfLiteRound(q * (1ll << 31))); - - data->input_range_radius = - CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31); - } - - if (input->type == kTfLiteInt16) { - static constexpr int kInputIntegerBits = 3; - static constexpr int kOutputFractionalBits = 15; - - // See comments in TanhPrepare about requiring zero_point==0 - // and a power-of-two ("POT") scale. - - TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0); - TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); - - int input_scale_log2_rounded; - bool param_scale_pot = - CheckedLog2(input->params.scale, &input_scale_log2_rounded); - - data->input_left_shift = - (15 - kInputIntegerBits) + input_scale_log2_rounded; - param_scale_pot &= (data->input_left_shift == 0); - - if (param_scale_pot) { - data->input_multiplier = 0; - } else { - // Calculate multiplier to change input scale to 1/(3*4096) - // as required by the table lookup. - // In this scaling +/-2^17 represents +/-10.7 - double multiplier = - static_cast(input->params.scale) * 4096.0 * 3.0; - - data->input_left_shift = 0; - - while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) { - data->input_left_shift++; - multiplier = multiplier * 2.0; - } - - data->input_multiplier = static_cast(multiplier); - } - - int output_scale_log2_rounded; - TF_LITE_ENSURE( - context, CheckedLog2(output->params.scale, &output_scale_log2_rounded)); - TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded, - -kOutputFractionalBits); - } - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - OpDataLogistic* data = static_cast(node->user_data); - - return CalculateArithmeticOpDataLogistic(context, node, data); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/lstm_eval.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/lstm_eval.h deleted file mode 100644 index 218b4938..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/lstm_eval.h +++ /dev/null @@ -1,250 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_ - -#include -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -// Pamameters for integer LSTM. -// Consider split this into two Integer Parameters if more fields are added. -struct IntegerLstmParameter { - int32_t effective_input_to_input_scale_a; - int32_t effective_input_to_input_scale_b; - int32_t effective_recurrent_to_input_scale_a; - int32_t effective_recurrent_to_input_scale_b; - int32_t effective_cell_to_input_scale_a; - int32_t effective_cell_to_input_scale_b; - int32_t effective_input_to_forget_scale_a; - int32_t effective_input_to_forget_scale_b; - int32_t effective_recurrent_to_forget_scale_a; - int32_t effective_recurrent_to_forget_scale_b; - int32_t effective_cell_to_forget_scale_a; - int32_t effective_cell_to_forget_scale_b; - int32_t effective_input_to_cell_scale_a; - int32_t effective_input_to_cell_scale_b; - int32_t effective_recurrent_to_cell_scale_a; - int32_t effective_recurrent_to_cell_scale_b; - int32_t effective_input_to_output_scale_a; - int32_t effective_input_to_output_scale_b; - int32_t effective_recurrent_to_output_scale_a; - int32_t effective_recurrent_to_output_scale_b; - int32_t effective_cell_to_output_scale_a; - int32_t effective_cell_to_output_scale_b; - int32_t effective_proj_scale_a; - int32_t effective_proj_scale_b; - int32_t effective_hidden_scale_a; - int32_t effective_hidden_scale_b; - int32_t layer_norm_input_scale_a; - int32_t layer_norm_input_scale_b; - int32_t layer_norm_forget_scale_a; - int32_t layer_norm_forget_scale_b; - int32_t layer_norm_cell_scale_a; - int32_t layer_norm_cell_scale_b; - int32_t layer_norm_output_scale_a; - int32_t layer_norm_output_scale_b; - // Quantized clip value for cell and projection. Zero value means no clipping. - int16_t quantized_cell_clip; - int8_t quantized_proj_clip; - int32_t hidden_zp; - int32_t cell_scale; - - int32_t input_variance_guard; - int32_t forget_variance_guard; - int32_t cell_variance_guard; - int32_t output_variance_guard; - - // Pre-calculate bias + zero_point * weight. - int32_t* input_to_forget_effective_bias; - int32_t* recurrent_to_forget_effective_bias; - int32_t* input_to_cell_effective_bias; - int32_t* recurrent_to_cell_effective_bias; - int32_t* input_to_output_effective_bias; - int32_t* recurrent_to_output_effective_bias; - int32_t* input_to_input_effective_bias; - int32_t* recurrent_to_input_effective_bias; - int32_t* projection_effective_bias; - - // Scale and zero point for intermediate tensors. - // Used only in the 8x8_8 case. - int32_t intermediate_scale_a[8]; - int32_t intermediate_scale_b[8]; - int32_t intermediate_zp[12]; -}; - -// Scales for hybrid op with integer inputs and float weights -struct HybridLstmScales { - float input_to_input_weights_scale; - float input_to_forget_weights_scale; - float input_to_cell_weights_scale; - float input_to_output_weights_scale; - float aux_input_to_input_weights_scale; - float aux_input_to_forget_weights_scale; - float aux_input_to_cell_weights_scale; - float aux_input_to_output_weights_scale; - float recurrent_to_input_weights_scale; - float recurrent_to_forget_weights_scale; - float recurrent_to_cell_weights_scale; - float recurrent_to_output_weights_scale; - float cell_to_input_weights_scale; - float cell_to_forget_weights_scale; - float cell_to_output_weights_scale; - float projection_weights_scale; -}; - -TfLiteStatus EvalFloatLstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* aux_input, - const TfLiteEvalTensor* aux_input_to_input_weights, - const TfLiteEvalTensor* aux_input_to_forget_weights, - const TfLiteEvalTensor* aux_input_to_cell_weights, - const TfLiteEvalTensor* aux_input_to_output_weights, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, int output_offset, - float* scratch_buffer, TfLiteEvalTensor* output_state, - TfLiteEvalTensor* cell_state, TfLiteEvalTensor* output); - -TfLiteStatus EvalHybridLstm( - const HybridLstmScales* hybrid_lstm_scales, const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_input_weights_ledger, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_forget_weights_ledger, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_cell_weights_ledger, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* input_to_output_weights_ledger, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_input_weights_ledger, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights_ledger, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights_ledger, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* recurrent_to_output_weights_ledger, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* aux_input, - const TfLiteEvalTensor* aux_input_to_input_weights, - const TfLiteEvalTensor* aux_input_to_forget_weights, - const TfLiteEvalTensor* aux_input_to_cell_weights, - const TfLiteEvalTensor* aux_input_to_output_weights, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_weights_ledger, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, int output_offset, - float* scratch_buffer, float* input_sf, float* aux_input_sf, - float* output_state_sf, float* prod_scaling_factors, - float* recovered_cell_weights, int8_t* input_quantized, - int8_t* aux_input_quantized, int8_t* output_state_quantized, - int8_t* cell_state_quantized, float* scales, TfLiteEvalTensor* output_state, - TfLiteEvalTensor* cell_state, int32_t* output_scratch_buffer, - TfLiteEvalTensor* output, int32_t* input_zp, int32_t* aux_input_zp, - int32_t* output_state_zp, int32_t* row_sums, int row_sums_size, - bool* compute_row_sums); - -TfLiteStatus EvalInteger8x8_16Lstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - bool forward_sequence, bool time_major, - const IntegerLstmParameter* integer_lstm_param, int32_t output_state_zp, - TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state, - TfLiteEvalTensor* output, int16_t* scratch0, int16_t* scratch1, - int16_t* scratch2, int16_t* scratch3, int8_t* scratch4, int32_t* scratch5); - -TfLiteStatus EvalInteger8x8_8Lstm( - const TfLiteEvalTensor* input, - const TfLiteEvalTensor* input_to_input_weights, - const TfLiteEvalTensor* input_to_forget_weights, - const TfLiteEvalTensor* input_to_cell_weights, - const TfLiteEvalTensor* input_to_output_weights, - const TfLiteEvalTensor* recurrent_to_input_weights, - const TfLiteEvalTensor* recurrent_to_forget_weights, - const TfLiteEvalTensor* recurrent_to_cell_weights, - const TfLiteEvalTensor* recurrent_to_output_weights, - const TfLiteEvalTensor* cell_to_input_weights, - const TfLiteEvalTensor* cell_to_forget_weights, - const TfLiteEvalTensor* cell_to_output_weights, - const TfLiteEvalTensor* input_layer_norm_coefficients, - const TfLiteEvalTensor* forget_layer_norm_coefficients, - const TfLiteEvalTensor* cell_layer_norm_coefficients, - const TfLiteEvalTensor* output_layer_norm_coefficients, - const TfLiteEvalTensor* input_gate_bias, - const TfLiteEvalTensor* forget_gate_bias, - const TfLiteEvalTensor* cell_gate_bias, - const TfLiteEvalTensor* output_gate_bias, - const TfLiteEvalTensor* projection_weights, - const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params, - TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state, - TfLiteEvalTensor* output, const IntegerLstmParameter* integer_lstm_param, - int8_t* scratch0, int8_t* scratch1, int16_t* scratch2, int16_t* scratch3, - int16_t* scratch4, int16_t* scratch5, int16_t* scratch6, int16_t* scratch7); - -} // namespace tflite -#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/lstm_shared.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/lstm_shared.h deleted file mode 100644 index ee34b848..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/lstm_shared.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ - -namespace tflite { - -// Input Tensors of size {n_batch, n_input} -constexpr int kLstmInputTensor = 0; - -// Input weight tensors of size: {n_cell, n_input} -constexpr int kLstmInputToInputWeightsTensor = 1; // Optional -constexpr int kLstmInputToForgetWeightsTensor = 2; -constexpr int kLstmInputToCellWeightsTensor = 3; -constexpr int kLstmInputToOutputWeightsTensor = 4; - -// Recurrent weight tensors of size {n_cell, n_output} -constexpr int kLstmRecurrentToInputWeightsTensor = 5; // Optional -constexpr int kLstmRecurrentToForgetWeightsTensor = 6; -constexpr int kLstmRecurrentToCellWeightsTensor = 7; -constexpr int kLstmRecurrentToOutputWeightsTensor = 8; - -// Peephole weights tensors of size {n_cell}, representing a diagonal matrix. -constexpr int kLstmCellToInputWeightsTensor = 9; // Optional -constexpr int kLstmCellToForgetWeightsTensor = 10; // Optional -constexpr int kLstmCellToOutputWeightsTensor = 11; // Optional - -// Gates bias tensors of size {n_cell} -constexpr int kLstmInputGateBiasTensor = 12; // Optional -constexpr int kLstmForgetGateBiasTensor = 13; -constexpr int kLstmCellGateBiasTensor = 14; -constexpr int kLstmOutputGateBiasTensor = 15; - -// Projection weight tensor of size {n_output, n_cell} -constexpr int kLstmProjectionWeightsTensor = 16; // Optional -// Projection bias tensor of size {n_output} -constexpr int kLstmProjectionBiasTensor = 17; // Optional - -// These state tensors are defined as variable tensors, and will be modified by -// this op. -constexpr int kLstmOutputStateTensor = 18; -constexpr int kLstmCellStateTensor = 19; - -// Layer norm coefficient tensors of size {n_cell}, representing a diagonal -// matrix. -constexpr int kLstmInputLayerNormCoefficientsTensor = 20; // Optional -constexpr int kLstmForgetLayerNormCoefficientsTensor = 21; // Optional -constexpr int kLstmCellLayerNormCoefficientsTensor = 22; // Optional -constexpr int kLstmOutputLayerNormCoefficientsTensor = 23; // Optional - -// Output tensors. -constexpr int kLstmOutputTensor = 0; - -} // namespace tflite -#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/micro_tensor_utils.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/micro_tensor_utils.h deleted file mode 100644 index 673ba6a3..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/micro_tensor_utils.h +++ /dev/null @@ -1,874 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This file and the associated .cc file is branched from -// tensorflow/lite/kernels/internal/reference/portable_tensor_utils* -// TFLM needs to create its own because the original files are coupled with -// the tensor_utils module, which we cannot reuse due to its use of the -// Eigen library. - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_ - -#include -#include -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" - -#if defined(_MSC_VER) -#define __restrict__ __restrict -#endif - -namespace tflite { - -// Not all backends support CpuBackendContext usage, so forward declare to avoid -// pulling in its implementation. -// TODO(b/230666277): consider removing this since micro does not utilize it -class CpuBackendContext; - -namespace micro_tensor_utils { - -template -inline bool PortableIsZeroVector(const T* vector, int v_size) { - for (int i = 0; i < v_size; ++i) { - if (vector[i] != 0) { - return false; - } - } - return true; -} - -void PortableSymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float* min_value, - float* max_value, float* scaling_factor); - -void PortableSymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float min_value, - float max_value, float* scaling_factor); - -void PortableAsymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, - float* scaling_factor, int32_t* offset); - -// Multiply a matrix by a batch vector, and store results in a batch-size -// vector. -void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix, - int m_rows, int m_cols, - const float* vector, - int n_batch, float* result); - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, const float* scaling_factors, - int n_batch, float* __restrict__ result); - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, const float* scaling_factors, - int n_batch, float* __restrict__ result, const float* per_channel_scale, - const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, - bool* compute_row_sums, CpuBackendContext* context); - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vector, const float* scaling_factors, - int n_batch, int32_t* scratch, float* __restrict__ result, - CpuBackendContext* context); - -void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4( - const float* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const float* __restrict__ vector, int n_batch, float* __restrict__ result); - -void PortableSparseMatrixBatchVectorMultiplyAccumulate( - const float* __restrict__ matrix, const uint8_t* __restrict__ ledger, - int m_rows, int m_cols, const float* __restrict__ vector, int n_batch, - float* __restrict__ result); - -void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16( - const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector, - int n_batch, const int32_t input_offset, const int32_t output_multiplier, - const int32_t output_shift, const int32_t output_offset, - const int32_t output_activation_min, const int32_t output_activation_max, - int8_t* __restrict__ result); - -void PortableSparseMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows, - const int m_cols, const int8_t* __restrict__ vectors, - const float* scaling_factors, int n_batch, float* __restrict__ result); - -// Dot product of two vectors. -float PortableVectorVectorDotProduct(const float* vector1, const float* vector2, - int v_size); - -void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1, - const int16_t* vector2, - int v_size, int n_batch, - int32_t* result); - -void PortableVectorBatchVectorCwiseProductAccumulate( - const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch, - int32_t multiplier, int shift, int16_t* result); - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - int32_t* scratch, int16_t* output, CpuBackendContext* context); - -void PortableMatrixBatchVectorMultiplyAccumulate( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - int32_t* scratch, int8_t* output, CpuBackendContext* context); - -void PortableMatrixBatchVectorMultiply(const int8_t* input, - int32_t input_zeropoint, - const int8_t* input_to_gate_weights, - int32_t input_to_gate_effective_scale_a, - int32_t input_to_gate_effective_scale_b, - int32_t n_batch, int32_t n_input, - int32_t n_cell, int8_t* gate_output, - int8_t gate_output_zp); - -void PortableMatrixBatchVectorMultiply( - const int16_t* hidden, const int8_t* hidden_to_output_weights, - int32_t proj_effective_scale_a, int32_t proj_effective_scale_b, - const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden, - int32_t n_output, int32_t output_zp, int8_t* proj_output); - -void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix, - int32_t scalar, int32_t n_row, - int32_t n_col, int32_t* output); - -void PortableApplyLayerNorm(const int16_t* input, - const int16_t* layer_norm_weights, - const int32_t* bias, int32_t layer_norm_scale_a, - int32_t layer_norm_scale_b, int32_t variance_limit, - int n_batch, int n_input, int16_t* output); - -void PortableApplyLayerNormFloat(const int16_t* input, - const int16_t* layer_norm_weights, - int32_t layer_norm_scale_a, - int32_t layer_norm_scale_b, - const int32_t* bias, int n_batch, int n_input, - int16_t* output); - -void PortableApplySigmoid(const int16_t* input, int32_t n_batch, - int32_t n_input, int16_t* output); - -void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch, - int32_t n_input, int16_t* output); - -void PortableApplyTanh(int32_t integer_bits, const int16_t* input, - int32_t n_batch, int32_t n_input, int16_t* output); - -void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch, - int32_t n_input, int32_t integer_bits, - int16_t* output); - -void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, - int n_batch, int n_input, int shift, int16_t* output); - -void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, - int32_t multiplier, int32_t shift, int32_t n_batch, - int32_t n_input, int32_t output_zp, int8_t* output); - -void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2, - int n_batch, int n_input, int16_t* output); - -template -inline void PortableCwiseClipping(T* vector, const int v_size, - const T& clipping_value) { - for (int i = 0; i < v_size; i++) { - vector[i] = std::max(std::min(clipping_value, vector[i]), - static_cast(-clipping_value)); - } -} - -// Batch vector initialization with another vector. -void PortableVectorBatchVectorAssign(const float* vector, int v_size, - int n_batch, float* batch_vector); - -// Compute "1.0f - elements of vector" (used in CIFG). -void PortableSub1Vector(const float* vector, int v_size, float* result); - -void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result); - -// Multiply all elements of vector with a scalar. -void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale, - float* result); - -// Reduce-sum on a vector: -// input_vector: pointer to input vector. -// output_vector: pointer to vector. -// output_size: output vector size. -// reduction_size: number of consecutive elements from input vector which are -// added to get one element of output. -template -inline void PortableReductionSumVector(const INPUT* input_vector, - OUTPUT* output_vector, int output_size, - int reduction_size) { - for (int o = 0; o < output_size; o++) { - OUTPUT result = 0; - for (int r = 0; r < reduction_size; r++) { - result += input_vector[r]; - } - output_vector[o] = result; - input_vector += reduction_size; - } -} - -// Layer norm for each batch. -void PortableMeanStddevNormalization(const float* __restrict__ input_vector, - float* __restrict__ output_vector, - int v_size, int n_batch); - -// Saturate Add. -void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, - const int8_t* recurrent, int8_t recurrent_zp, - int32_t input_effective_scale_a, - int32_t input_effective_scale_b, - int32_t recurrent_effective_scale_a, - int32_t recurrent_effective_scale_b, - int32_t n_batch, int32_t n_cell, - int16_t* output); - -// Add another vector for each batch in the batch vector. -template -inline void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch, - T* batch_vector) { - for (int b = 0; b < n_batch; b++) { - for (int i = 0; i < v_size; ++i) { - batch_vector[i] += vector[i]; - } - batch_vector += v_size; - } -} - -// Cwise product of two vectors. -template -inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2, - int v_size, T* result) { - for (int v = 0; v < v_size; v++) { - *result++ = *vector1++ * *vector2++; - } -} - -// Cwise product of a vector and a batch-vector. -template -inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size, - const T* batch_vector, int n_batch, - T* result) { - for (int b = 0; b < n_batch; b++) { - VectorVectorCwiseProduct(vector, batch_vector, v_size, result); - // Update the pointers. - result += v_size; - batch_vector += v_size; - } -} - -// Reduce-sum on a float input vector: -// input_vector: float pointer to input vector. -// output_vector: float pointer to vector. -// output_size: output vector size. -// reduction_size: number of consecutive elements from input vector which are -// added to get one element of output. -inline void ReductionSumVector(const float* input_vector, float* output_vector, - int output_size, int reduction_size) { - PortableReductionSumVector(input_vector, output_vector, output_size, - reduction_size); -} - -// Same as above but input/output is 32 bit integer. -inline void ReductionSumVector(const int32_t* input_vector, - int32_t* output_vector, int output_size, - int reduction_size) { - PortableReductionSumVector(input_vector, output_vector, output_size, - reduction_size); -} - -// Same as above but input is 8 bit integer. -inline void ReductionSumVector(const int8_t* input_vector, - int32_t* output_vector, int output_size, - int reduction_size) { - PortableReductionSumVector(input_vector, output_vector, output_size, - reduction_size); -} - -// Cwise product and accumulate of two vectors. Since it's a MAC operation, the -// assumption here is that result array is initialized to valid values. -template -inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1, - const T* __restrict__ vector2, - int v_size, - T* __restrict__ result) { - for (int v = 0; v < v_size; v++) { - *result++ += *vector1++ * *vector2++; - } -} - -// Batch vector initialization with another vector. -template -inline void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch, - T* batch_vector) { - for (int b = 0; b < n_batch; b++) { - std::copy_n(vector, v_size, batch_vector + b * v_size); - } -} - -inline void SymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float* min, - float* max, float* scaling_factor) { - PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max, - scaling_factor); -} - -inline void SymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, float min_value, - float max_value, float* scaling_factor) { - PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value, - max_value, scaling_factor); -} - -inline void AsymmetricQuantizeFloats(const float* values, const int size, - int8_t* quantized_values, - float* scaling_factor, int32_t* offset) { - PortableAsymmetricQuantizeFloats(values, size, quantized_values, - scaling_factor, offset); -} - -// Helper function to quantize floats. -// float_data_ptr input float vectors -// n_batch number of input vectors -// n_data size of a single input vector -// quantized_data_ptr (out) vector with quantized data -// scaling_factors (out) scaling factors (one per vector) -// zero_points (out) zero points (one per vector) -// do_asymmetric controls if the quantization should be asymmetric. -inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch, - int n_data, int8_t* quantized_data_ptr, - float* scaling_factors, int32_t* zero_points, - bool do_asymmetric) { - for (int b = 0; b < n_batch; ++b) { - const int offset = b * n_data; - if (do_asymmetric) { - AsymmetricQuantizeFloats(float_data_ptr + offset, n_data, - quantized_data_ptr + offset, &scaling_factors[b], - &zero_points[b]); - } else { - float unused_min, unused_max; - SymmetricQuantizeFloats(float_data_ptr + offset, n_data, - quantized_data_ptr + offset, &unused_min, - &unused_max, &scaling_factors[b]); - } - } -} - -// Check if all entries of a vector are zero for float. -inline bool IsZeroVector(const float* vector, int v_size) { - return PortableIsZeroVector(vector, v_size); -} - -// Check if all entries of a vector are zero for int8_t. -inline bool IsZeroVector(const int8_t* vector, int v_size) { - return PortableIsZeroVector(vector, v_size); -} - -// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized -// vector. -// Parameters: -// - input: batch vector of size n_batch * n_input; 16 bit. -// - layer_norm_weights: the quantized layer normalization weights. -// - bias: the bias for the layer normalization. -// - layer_norm_scale_a: multiplier for scale factor. -// - layer_norm_scale_b: shift for scale factor. -// - variance_limit: the guard to make sure the inverse does not overflow. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - output: the 16 bit output -inline void ApplyLayerNorm(const int16_t* input, - const int16_t* layer_norm_weights, - const int32_t* bias, int32_t layer_norm_scale_a, - int32_t layer_norm_scale_b, int32_t variance_limit, - int n_batch, int n_input, int16_t* output) { - PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a, - layer_norm_scale_b, variance_limit, n_batch, n_input, - output); -} - -// Same as above but the internal calculation is done in float. -inline void ApplyLayerNormFloat(const int16_t* input, - const int16_t* layer_norm_weights, - int32_t layer_norm_scale_a, - int32_t layer_norm_scale_b, const int32_t* bias, - int n_batch, int n_input, int16_t* output) { - PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a, - layer_norm_scale_b, bias, n_batch, n_input, - output); -} - -// Apply Sigmoid to a quantized vector. -// Parameters: -// - input: batch vector of size n_batch * n_input; 16 bit. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - output: the 16 bit output -// The input is in Q3.12 format and the output is in Q0.15 format. -inline void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input, - int16_t* output) { - PortableApplySigmoid(input, n_batch, n_input, output); -} - -// Same as above but the internal calcualtion is float. -inline void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, - int32_t n_input, int16_t* output) { - PortableApplySigmoidFloat(input, n_batch, n_input, output); -} - -// Apply Tanh to a quantized vector. -// Parameters: -// - integer_bits: the integer bits of the input. -// Currently supports 0, 1, 2, 3, 4, 5, 6. -// - input: batch vector of size n_batch * n_input; 16 bit. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - output: the 16 bit output -// The input is in Qm.15-m format and the output is in Q0.15 format. -inline void ApplyTanh(int32_t integer_bits, const int16_t* input, - int32_t n_batch, int32_t n_input, int16_t* output) { - PortableApplyTanh(integer_bits, input, n_batch, n_input, output); -} - -// Apply Tanh to a quantized vector. Tbe internal calculation is in float. -// - Input has 2^(integer_bits) as scale. -// - Output has Q0.15 as scale. -inline void ApplyTanhFloat(const int16_t* input, int32_t n_batch, - int32_t n_input, int32_t integer_bits, - int16_t* output) { - PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output); -} - -// Element-wise multiplication of two quantized vectors. -// Parameters: -// - input_1: batch vector of size n_batch * n_input; 16 bit. -// - input_2: batch vector of size n_batch * n_input; 16 bit. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - shift: the shift needed to produce the output. -// - output: the 16 bit output of size n_batch * n_input. -// Output does not need to be initialized. -inline void CwiseMul(const int16_t* input_1, const int16_t* input_2, - int n_batch, int n_input, int shift, int16_t* output) { - PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output); -} - -// Element-wise multiplication of two quantized vectors with rescaling. -// Parameters: -// - input_1: batch vector of size n_batch * n_input; 16 bit. -// - input_2: batch vector of size n_batch * n_input; 16 bit. -// - multiplier: the multiplier part of scale. -// - shift: the shift part of scale. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - output: the 8 bit output of size n_batch * n_input. -// - output_zp: the zero point of output. -// Output does not need to be initialized. -// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m * -// 2^(s - 31). -inline void CwiseMul(const int16_t* input_1, const int16_t* input_2, - int32_t multiplier, int32_t shift, int32_t n_batch, - int32_t n_input, int32_t output_zp, int8_t* output) { - PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input, - output_zp, output); -} - -// Element-wise in-place clipping of a vector. Overloaded for float, int16_t, -// int8_t. Parameters: -// - vector: vector of size v_size. -// - v_size: the size of the vector. -// - clipping_value: the value used for clipping. -inline void CwiseClipping(float* vector, const int v_size, - const float clipping_value) { - PortableCwiseClipping(vector, v_size, clipping_value); -} - -inline void CwiseClipping(int16_t* vector, const int v_size, - const int16_t clipping_value) { - PortableCwiseClipping(vector, v_size, clipping_value); -} - -inline void CwiseClipping(int8_t* vector, const int v_size, - const int8_t clipping_value) { - PortableCwiseClipping(vector, v_size, clipping_value); -} - -// Element-wise saturating addition of two quantized vectors without rescaling. -// Parameters: -// - input_1: batch vector of size n_batch * n_input; 16 bit. -// - input_2: batch vector of size n_batch * n_input; 16 bit. -// - n_batch: the number of batches. -// - n_input: the size for input and output. -// - output: the 8 bit output of size n_batch * n_input. -// Output does not need to be initialized. -inline void CwiseAdd(const int16_t* input_1, const int16_t* input_2, - int n_batch, int n_input, int16_t* output) { - PortableCwiseAdd(input_1, input_2, n_batch, n_input, output); -} - -inline void MeanStddevNormalization(const float* input_vector, - float* output_vector, int v_size, - int n_batch) { - PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch); -} - -inline void Sub1Vector(const float* vector, int v_size, float* result) { - PortableSub1Vector(vector, v_size, result); -} - -inline void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) { - PortableSub1Vector(vector, v_size, result); -} - -// Multiply all elements of vector with a scalar. -inline void VectorScalarMultiply(const int8_t* vector, int v_size, float scale, - float* result) { - PortableVectorScalarMultiply(vector, v_size, scale, result); -} - -// Saturate Add with rescale on both inputs. -inline void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, - const int8_t* recurrent, int8_t recurrent_zp, - int32_t input_effective_scale_a, - int32_t input_effective_scale_b, - int32_t recurrent_effective_scale_a, - int32_t recurrent_effective_scale_b, - int32_t n_batch, int32_t n_cell, - int16_t* output) { - PortableTwoGateSaturatingAdd( - input, input_zp, recurrent, recurrent_zp, input_effective_scale_a, - input_effective_scale_b, recurrent_effective_scale_a, - recurrent_effective_scale_b, n_batch, n_cell, output); -} - -// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch -// dimension composed by input vectors independent from each other). The result -// of the multiplication is accumulated to the passed result buffer. -// More specifically, for a matrix M of shape [n, i] and a batched-vector -// of shape [i, batch] it will first compute the product of shape [n, batch]. -// This product will be accumulated to the result buffer. -inline void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows, - int m_cols, const float* vector, - int n_batch, float* result) { - PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, - n_batch, result); -} - -// Same as the function above, but the matrix is a sparse tensor with block -// pattern 1x4. -// This function assumes that m_cols is a multiple of the block size (4 in this -// case) so that there's no incomplete block. -inline void MatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vector, const float* scaling_factors, - int n_batch, float* __restrict__ result) { - PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, - scaling_factors, n_batch, result); -} - -inline void MatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, const float* scaling_factors, - int n_batch, float* __restrict__ result, const float* per_channel_scale, - const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, - bool* compute_row_sums, CpuBackendContext* context) { - PortableMatrixBatchVectorMultiplyAccumulate( - matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result, - per_channel_scale, input_offset, scratch, row_sums, compute_row_sums, - context); -} - -inline void MatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vector, const float* scaling_factors, - int n_batch, int32_t* scratch, float* __restrict__ result, - CpuBackendContext* context) { - PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, - scaling_factors, n_batch, result); -} - -// Same as the function above, but the matrix is a sparse tensor with block -// pattern 1x4. -// This function assumes that m_cols is a multiple of the block size (4 in this -// case) so that there's no incomplete block. -inline void SparseMatrixBatchVectorMultiplyAccumulate1x4( - const float* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const float* __restrict__ vector, int n_batch, float* __restrict__ result) { - PortableSparseMatrixBatchVectorMultiplyAccumulate1x4( - matrix, segments, indices, m_rows, m_cols, vector, n_batch, result); -} - -// Same as the function above, but the matrix is stored in block compressed -// sparse row format with block pattern 1x16 which consists of two arrays: -// 1. A matrix array stores non-zero blocks of the matrix in row major. -// 2. A ledger array stores nrows groups, one group per row. Each group starts -// with an integer representing the number of non-zero blocks for the -// corresponding row and follows with column indexes of the first element -// of each non-zero block. -// This function assumes that -// 1. m_cols is a multiple of 16 so that all blocks are full blocks. -// 2. m_cols < 254 * 16 so that block index can be represented by uint8. -inline void SparseMatrixBatchVectorMultiplyAccumulate( - const float* __restrict__ matrix, const uint8_t* __restrict__ ledger, - int m_rows, int m_cols, const float* __restrict__ vector, int n_batch, - float* __restrict__ result) { - PortableSparseMatrixBatchVectorMultiplyAccumulate( - matrix, ledger, m_rows, m_cols, vector, n_batch, result); -} - -// Same as the function above, but the matrix is a sparse tensor with block -// pattern 1x16. -// This function assumes that m_cols is a multiple of the block size (16 in this -// case) so that there's no incomplete block. Also, it assumes all offsets of -// input, output and filter are zero. -inline void SparseMatrixBatchVectorMultiplyAccumulate1x16( - const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments, - const int32_t* __restrict__ indices, int m_rows, int m_cols, - const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector, - int n_batch, const int32_t input_offset, const int32_t output_multiplier, - const int32_t output_shift, const int32_t output_offset, - const int32_t output_activation_min, const int32_t output_activation_max, - int8_t* __restrict__ result) { - PortableSparseMatrixBatchVectorMultiplyAccumulate1x16( - matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch, - input_offset, output_multiplier, output_shift, output_offset, - output_activation_min, output_activation_max, result); -} - -// Same as the function above, but the matrix is stored in block compressed -// sparse row format with block pattern 1x16 which consists of two arrays: -// 1. A matrix array stores non-zero blocks of the matrix in row major. -// 2. A ledger array stores nrows groups, one group per row. Each group starts -// with an integer representing the number of non-zero blocks for the -// corresponding row followed by column index of the first element of -// each non-zero block. -// This function assumes that -// 1. m_cols is a multiple of 16 so that all blocks are full blocks. -// 2. m_cols < 254 * 16 so that block index can be represented by uint8. -inline void SparseMatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows, - const int m_cols, const int8_t* __restrict__ vectors, - const float* scaling_factors, int n_batch, float* __restrict__ result) { - PortableSparseMatrixBatchVectorMultiplyAccumulate( - matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch, - result); -} - -// Same as the above 8, 8, 8 integer matmul except for the presence of zero -// point and non-accumulative. -// TODO(b/148688698): remove this function by folding zero point calculation in -// prepare() function. -inline void MatrixBatchVectorMultiplyAccumulate( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - int32_t* scratch, int16_t* output, CpuBackendContext* context) { - PortableMatrixBatchVectorMultiplyAccumulate( - input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input, - n_output, output_zp, scratch, output, context); -} - -// Same as above but has 16 bit and 8 bit input and 8 bit output. -// Used in projection when hidden is 16bit. -inline void MatrixBatchVectorMultiplyAccumulate( - const int8_t* input, const int32_t* bias, - const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift, - int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp, - int32_t* scratch, int8_t* output, CpuBackendContext* context) { - PortableMatrixBatchVectorMultiplyAccumulate( - input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input, - n_output, output_zp, scratch, output, context); -} - -// Same as the function above, but provides separate scaling factor for the -// matrix and the vectors. The scaling factors are multiplied in the -// scaling_factor_scratch buffer. -inline void MatrixBatchVectorMultiplyAccumulate( - const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, - const int8_t* __restrict__ vectors, const float matrix_scaling_factor, - const float* vector_scaling_factors, int n_batch, - float* __restrict__ result, const float* per_channel_scale, - const int32_t* input_offset, int32_t* scratch, int32_t* row_sums, - bool* compute_row_sums, float* scaling_factor_scratch, - CpuBackendContext* context) { - for (int b = 0; b < n_batch; ++b) { - scaling_factor_scratch[b] = - vector_scaling_factors[b] * matrix_scaling_factor; - } - MatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vectors, - scaling_factor_scratch, n_batch, result, - per_channel_scale, input_offset, scratch, - row_sums, compute_row_sums, context); -} - -// Multiplies a matrix with a scalar and reduce the result on each row to a -// scalar. -// Parameters: -// - matrix: matrix of size n_row * n_col -// - scalar: the scalar that is multiplied to each element in the matrix -// - n_row: the row count of the matrix -// - n_col: the column count of the matrix -// - output: the 32bit output -// Note: We do not need saturation because the int8 * int8 is safe from overflow -// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero -// initial output value is not exceptionally large. -inline void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar, - int32_t n_row, int32_t n_col, - int32_t* output) { - PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output); -} - -// Same as the above 8, 8, 8 integer matmul except for the presence of zero -// point and non-accumulative. -// TODO(b/148688698): remove this function by folding zero point calculation in -// prepare() function. -inline void MatrixBatchVectorMultiply(const int8_t* input, - int32_t input_zeropoint, - const int8_t* input_to_gate_weights, - int32_t input_to_gate_effective_scale_a, - int32_t input_to_gate_effective_scale_b, - int32_t n_batch, int32_t n_input, - int32_t n_cell, int8_t* gate_output, - int8_t gate_output_zp) { - PortableMatrixBatchVectorMultiply( - input, input_zeropoint, input_to_gate_weights, - input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch, - n_input, n_cell, gate_output, gate_output_zp); -} - -// Same as above but has 16 bit and 8 bit input and 8 bit output. -// Used in projection when hidden is 16bit. -inline void MatrixBatchVectorMultiply(const int16_t* hidden, - const int8_t* hidden_to_output_weights, - int32_t proj_effective_scale_a, - int32_t proj_effective_scale_b, - const int32_t* gate_bias, int32_t n_batch, - int32_t n_hidden, int32_t n_output, - int32_t output_zp, int8_t* proj_output) { - PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights, - proj_effective_scale_a, - proj_effective_scale_b, gate_bias, n_batch, - n_hidden, n_output, output_zp, proj_output); -} - -// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC -// operation, the assumption here is that result array is initialized to valid -// values. -template -inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size, - const T* batch_vector, - int n_batch, T* result) { - for (int b = 0; b < n_batch; b++) { - VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result); - // Update the pointers. - result += v_size; - batch_vector += v_size; - } -} - -// Same as above, but inputs are 16bit integer and output is 16bit integer. -inline void VectorBatchVectorCwiseProductAccumulate( - const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch, - int32_t multiplier, int shift, int16_t* result) { - PortableVectorBatchVectorCwiseProductAccumulate( - vector, v_size, batch_vector, n_batch, multiplier, shift, result); -} - -// Apply Rectified Linear to elements of a vector. -inline void ApplyReluToVector(const float* vector, int v_size, float* result) { - for (int v = 0; v < v_size; v++) { - result[v] = std::max(0.0f, vector[v]); - } -} - -// Apply Rectified Linear 1 (cap to [-1;1]) to elements of a vector -inline void ApplyRelu1ToVector(const float* vector, int v_size, float* result) { - for (int v = 0; v < v_size; v++) { - result[v] = std::max(-1.0f, std::min(vector[v], 1.0f)); - } -} - -// Apply Rectified Linear 6 (cap to [0;6]) to elements of a vector -inline void ApplyRelu6ToVector(const float* vector, int v_size, float* result) { - for (int v = 0; v < v_size; v++) { - result[v] = std::max(0.0f, std::min(vector[v], 6.0f)); - } -} - -// Apply tanh to elements of a vector -inline void ApplyTanhToVector(const float* vector, int v_size, float* result) { - for (int v = 0; v < v_size; v++) { - result[v] = std::tanh(vector[v]); - } -} - -// Apply signbit to elements of a vector -inline void ApplySignbitToVector(const float* vector, int v_size, - float* result) { - for (int v = 0; v < v_size; v++) { - result[v] = std::signbit(vector[v]); - } -} - -// Apply sigmoid to elements of a vector. -inline void ApplySigmoidToVector(const float* vector, int v_size, - float* result) { - for (int v = 0; v < v_size; v++) { - result[v] = 1.0f / (1.0f + std::exp(-vector[v])); - } -} - -// Apply appropriate activation function to elements of a vector. -inline void ApplyActivationToVector(const float* vector, int v_size, - TfLiteFusedActivation activation, - float* result) { - switch (activation) { - case kTfLiteActNone: - return; - case kTfLiteActRelu: - return ApplyReluToVector(vector, v_size, result); - case kTfLiteActReluN1To1: - return ApplyRelu1ToVector(vector, v_size, result); - case kTfLiteActRelu6: - return ApplyRelu6ToVector(vector, v_size, result); - case kTfLiteActTanh: - return ApplyTanhToVector(vector, v_size, result); - case kTfLiteActSignBit: - return ApplySignbitToVector(vector, v_size, result); - case kTfLiteActSigmoid: - return ApplySigmoidToVector(vector, v_size, result); - } -} - -} // namespace micro_tensor_utils - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/micro_utils.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/micro_utils.h deleted file mode 100644 index e406ac12..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/micro_utils.h +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_ -namespace tflite { -namespace ops { -namespace micro { - -// Same as gtl::Greater but defined here to reduce dependencies and -// binary size for micro environment. -struct Greater { - template - bool operator()(const T& x, const T& y) const { - return x > y; - } -}; - -struct Less { - template - bool operator()(const T& x, const T& y) const { - return x < y; - } -}; - -} // namespace micro -} // namespace ops -} // namespace tflite -#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/mirror_pad.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/mirror_pad.cc deleted file mode 100644 index 90d3bd9e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/mirror_pad.cc +++ /dev/null @@ -1,215 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -namespace tflite { -namespace { - -struct OpDataMirrorPad { - int input_dims; - int output_size; - int offset; - int output_dims_num_elements_buffer_index; - int input_dims_num_elements_buffer_index; -}; - -// Helper method that fills the left and right pads. -template -inline void GetPadding(const T* data, int offset, int64_t* left_pad, - int64_t* right_pad) { - *left_pad = static_cast(*(data + offset * 2)); - *right_pad = static_cast(*(data + offset * 2 + 1)); -} - -// Given dimension index and the left/right padding. -// Returns the corresponding dimension in the input array. -inline int GetInputDimension(int padded_dimension, int left_pad, int right_pad, - int input_dim_size, int offset) { - if (padded_dimension < left_pad) { - const int original_ind = left_pad + offset - 1; - return original_ind - (std::min(padded_dimension, original_ind - offset)); - } - padded_dimension -= left_pad; - if (padded_dimension >= input_dim_size) { - padded_dimension -= input_dim_size; - const int original_ind = input_dim_size - (1 + offset); - return original_ind - std::min(padded_dimension, original_ind); - } - return padded_dimension; -} - -// Given and index in output array, returns the index of the value -// in input array. -int GetFlatIndex(int index, int num_dims, - const TfLiteEvalTensor* padding_matrix, - const TfLiteIntArray* input_dims, - int* output_dims_num_elements, int* input_dims_num_elements, - const int offset) { - int flat_index = 0; - int64_t left_pad = 0, right_pad = 0, dimension_index, index_in_input; - - for (int i = 0; i < num_dims; ++i) { - switch (padding_matrix->type) { - case kTfLiteInt32: - GetPadding(padding_matrix->data.i32, i, &left_pad, &right_pad); - break; - case kTfLiteInt64: - GetPadding(padding_matrix->data.i64, i, &left_pad, &right_pad); - break; - default: - break; - } - dimension_index = index / output_dims_num_elements[i]; - - index_in_input = GetInputDimension(dimension_index, left_pad, right_pad, - input_dims->data[i], offset); - - flat_index += index_in_input * (input_dims_num_elements)[i]; - index %= output_dims_num_elements[i]; - } - - return flat_index; -} - -template -void MirrorPad(const TfLiteEvalTensor* padding_matrix, - const TfLiteIntArray* input_dims, int* output_dims_num_elements, - int* input_dims_num_elements, const T* input_data, - T* output_data, const int offset, const int num_dims, - const int output_size) { - for (int i = 0; i < output_size; ++i) { - output_data[i] = input_data[GetFlatIndex( - i, num_dims, padding_matrix, input_dims, output_dims_num_elements, - input_dims_num_elements, offset)]; - } -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - TfLiteStatus status = kTfLiteOk; - const OpDataMirrorPad* data = - static_cast(node->user_data); - - const TfLiteEvalTensor* input_tensor = - tflite::micro::GetEvalInput(context, node, 0); - const TfLiteEvalTensor* padding_matrix = - tflite::micro::GetEvalInput(context, node, 1); - - TfLiteEvalTensor* output_tensor = - tflite::micro::GetEvalOutput(context, node, 0); - const int input_dims = data->input_dims; - const int output_size = data->output_size; - - int* input_dims_num_elements = (int*)context->GetScratchBuffer( - context, data->input_dims_num_elements_buffer_index); - int* output_dims_num_elements = (int*)context->GetScratchBuffer( - context, data->output_dims_num_elements_buffer_index); - - for (int i = 0; i < input_dims; i++) { - output_dims_num_elements[i] = 1; - input_dims_num_elements[i] = 1; - } - - for (int i = input_dims - 2; i >= 0; i--) { - output_dims_num_elements[i] = - output_dims_num_elements[i + 1] * output_tensor->dims->data[i + 1]; - - input_dims_num_elements[i] = - input_dims_num_elements[i + 1] * input_tensor->dims->data[i + 1]; - } - - switch (output_tensor->type) { - case kTfLiteFloat32: { - MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements, - input_dims_num_elements, - tflite::micro::GetTensorData(input_tensor), - tflite::micro::GetTensorData(output_tensor), - data->offset, input_dims, output_size); - break; - } - case kTfLiteInt8: { - MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements, - input_dims_num_elements, - tflite::micro::GetTensorData(input_tensor), - tflite::micro::GetTensorData(output_tensor), - data->offset, input_dims, output_size); - break; - } - default: - status = kTfLiteError; - break; - } - -#undef TF_LITE_MIRROR_PAD - - return status; -} - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpDataMirrorPad)); -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TFLITE_DCHECK(node->user_data != nullptr); - OpDataMirrorPad* data = static_cast(node->user_data); - - TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0); - TfLiteTensor* padding_matrix = - micro_context->AllocateTempInputTensor(node, 1); - TfLiteTensor* output_tensor = - micro_context->AllocateTempOutputTensor(node, 0); - - TF_LITE_ENSURE_EQ(context, NumDimensions(padding_matrix), 2); - TF_LITE_ENSURE_EQ(context, SizeOfDimension(padding_matrix, 0), - NumDimensions(input_tensor)); - auto* params = - reinterpret_cast(node->builtin_data); - if (params == nullptr) { - return kTfLiteError; - } - - data->offset = - params->mode != TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect ? 0 - : 1; - data->input_dims = NumDimensions(input_tensor); - data->output_size = NumElements(output_tensor); - - TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( - context, data->input_dims * sizeof(int), - &data->output_dims_num_elements_buffer_index)); - TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( - context, data->input_dims * sizeof(int), - &data->input_dims_num_elements_buffer_index)); - - micro_context->DeallocateTempTfLiteTensor(input_tensor); - micro_context->DeallocateTempTfLiteTensor(padding_matrix); - micro_context->DeallocateTempTfLiteTensor(output_tensor); - return kTfLiteOk; -} - -} // namespace - -TfLiteRegistration Register_MIRROR_PAD() { - return tflite::micro::RegisterOp(Init, Prepare, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/mul.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/mul.cc deleted file mode 100644 index 59f006b0..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/mul.cc +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/kernels/mul.h" - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" -#include "tensorflow/lite/kernels/internal/reference/mul.h" -#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->builtin_data != nullptr); - auto* params = reinterpret_cast(node->builtin_data); - - TFLITE_DCHECK(node->user_data != nullptr); - const OpDataMul* data = static_cast(node->user_data); - - const TfLiteEvalTensor* input1 = - tflite::micro::GetEvalInput(context, node, kMulInput1Tensor); - const TfLiteEvalTensor* input2 = - tflite::micro::GetEvalInput(context, node, kMulInput2Tensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kMulOutputTensor); - - switch (input1->type) { - case kTfLiteInt8: - case kTfLiteInt32: - EvalMulQuantizedReference(context, node, data, input1, input2, output); - break; - case kTfLiteFloat32: - EvalMulFloatReference(context, node, params, data, input1, input2, - output); - break; - default: - MicroPrintf("Type %s (%d) not supported.", - TfLiteTypeGetName(input1->type), input1->type); - return kTfLiteError; - } - - return kTfLiteOk; -} - -TfLiteRegistration Register_MUL() { - return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/mul_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/mul_common.cc deleted file mode 100644 index 6d19ac7a..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/mul_common.cc +++ /dev/null @@ -1,184 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" -#include "tensorflow/lite/kernels/internal/reference/mul.h" -#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/mul.h" -#include "tensorflow/lite/micro/memory_helpers.h" - -namespace tflite { - -const int kMulInput1Tensor = 0; -const int kMulInput2Tensor = 1; -const int kMulOutputTensor = 0; - -void* MulInit(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpDataMul)); -} - -TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node, - TfLiteMulParams* params, OpDataMul* data) { - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input1 = - micro_context->AllocateTempInputTensor(node, kMulInput1Tensor); - TF_LITE_ENSURE(context, input1 != nullptr); - TfLiteTensor* input2 = - micro_context->AllocateTempInputTensor(node, kMulInput2Tensor); - TF_LITE_ENSURE(context, input2 != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kMulOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - - TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); - - if (output->type == kTfLiteInt8) { - TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( - context, params->activation, output, &data->output_activation_min, - &data->output_activation_max)); - - double real_multiplier = static_cast(input1->params.scale) * - static_cast(input2->params.scale) / - static_cast(output->params.scale); - QuantizeMultiplier(real_multiplier, &data->output_multiplier, - &data->output_shift); - - data->input1_zero_point = input1->params.zero_point; - data->input2_zero_point = input2->params.zero_point; - data->output_zero_point = output->params.zero_point; - } else if (output->type == kTfLiteInt32) { - CalculateActivationRange(params->activation, &data->output_activation_min, - &data->output_activation_max); - } else { - CalculateActivationRange(params->activation, - &data->output_activation_min_f32, - &data->output_activation_max_f32); - } - - micro_context->DeallocateTempTfLiteTensor(input1); - micro_context->DeallocateTempTfLiteTensor(input2); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -TfLiteStatus MulPrepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->builtin_data != nullptr); - auto* params = reinterpret_cast(node->builtin_data); - - TFLITE_DCHECK(node->user_data != nullptr); - OpDataMul* data = static_cast(node->user_data); - - return CalculateOpDataMul(context, node, params, data); -} - -void EvalMulQuantizedReference(TfLiteContext* context, TfLiteNode* node, - const OpDataMul* data, - const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, - TfLiteEvalTensor* output) { - tflite::ArithmeticParams op_params = {}; - op_params.quantized_activation_min = data->output_activation_min; - op_params.quantized_activation_max = data->output_activation_max; - op_params.float_activation_max = data->output_activation_max_f32; - op_params.input1_offset = -data->input1_zero_point; - op_params.input2_offset = -data->input2_zero_point; - op_params.output_offset = data->output_zero_point; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; - - bool need_broadcast = reference_ops::ProcessBroadcastShapes( - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorShape(input2), &op_params); - - if (input1->type == kTfLiteInt8) { - if (need_broadcast) { - reference_integer_ops::BroadcastMul4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - reference_integer_ops::Mul(op_params, - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } - } else if (input1->type == kTfLiteInt32) { - if (need_broadcast) { - reference_ops::BroadcastMul4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } - } -} - -void EvalMulFloatReference(TfLiteContext* context, TfLiteNode* node, - TfLiteMulParams* params, const OpDataMul* data, - const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, - TfLiteEvalTensor* output) { - tflite::ArithmeticParams op_params = {}; - op_params.float_activation_min = data->output_activation_min_f32; - op_params.float_activation_max = data->output_activation_max_f32; - - bool need_broadcast = reference_ops::ProcessBroadcastShapes( - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorShape(input2), &op_params); - - if (need_broadcast) { - reference_ops::BroadcastMul4DSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/pooling_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/pooling_common.cc deleted file mode 100644 index ddc18f0b..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/pooling_common.cc +++ /dev/null @@ -1,170 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h" -#include "tensorflow/lite/kernels/internal/reference/pooling.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/pooling.h" - -namespace tflite { - -const int kPoolingInputTensor = 0; -const int kPoolingOutputTensor = 0; - -TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context, - const TfLitePoolParams* params, - const TfLiteTensor* input, - const TfLiteTensor* output, - OpDataPooling* data) { - // input: batch, height, width, channel - int height = SizeOfDimension(input, 1); - int width = SizeOfDimension(input, 2); - - int out_height, out_width; - - data->padding = ComputePaddingHeightWidth( - params->stride_height, params->stride_width, - /*dilation_rate_height=*/1, - /*dilation_rate_width=*/1, height, width, params->filter_height, - params->filter_width, params->padding, &out_height, &out_width); - - return kTfLiteOk; -} - -TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->builtin_data != nullptr); - auto* params = reinterpret_cast(node->builtin_data); - - TFLITE_DCHECK(node->user_data != nullptr); - OpDataPooling* data = static_cast(node->user_data); - - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kPoolingInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kPoolingOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - TF_LITE_ENSURE_STATUS( - CalculateOpDataPooling(context, params, input, output, data)); - - if (input->type == kTfLiteFloat32) { - CalculateActivationRange(params->activation, &data->activation_min_f32, - &data->activation_max_f32); - } else if (input->type == kTfLiteInt8) { - CalculateActivationRangeQuantized(context, params->activation, output, - &data->activation_min, - &data->activation_max); - } - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - - return kTfLiteOk; -} - -void AveragePoolingEvalFloat(const TfLiteContext* context, - const TfLiteNode* node, - const TfLitePoolParams* params, - const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.float_activation_min = data->activation_min_f32; - op_params.float_activation_max = data->activation_max_f32; - reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - -void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node, - const TfLitePoolParams* params, - const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - TFLITE_DCHECK(input->type == kTfLiteInt8); - - PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = data->activation_min; - op_params.quantized_activation_max = data->activation_max; - - reference_integer_ops::AveragePool( - op_params, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - -void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - tflite::PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.float_activation_min = data->activation_min_f32; - op_params.float_activation_max = data->activation_max_f32; - reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - -void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, - const OpDataPooling* data, - const TfLiteEvalTensor* input, - TfLiteEvalTensor* output) { - tflite::PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = data->activation_min; - op_params.quantized_activation_max = data->activation_max; - - reference_integer_ops::MaxPool(op_params, - tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/prelu.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/prelu.h deleted file mode 100644 index 571d1e88..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/prelu.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -TfLiteStatus CalculatePreluParams(const TfLiteTensor* input, - const TfLiteTensor* alpha, - TfLiteTensor* output, PreluParams* params); - -void BroadcastPrelu4DSlowFloat(const RuntimeShape& unextended_input1_shape, - const float* input1_data, - const RuntimeShape& unextended_input2_shape, - const float* input2_data, - const RuntimeShape& unextended_output_shape, - float* output_data); - -TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/prelu_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/prelu_common.cc deleted file mode 100644 index 1a89cadf..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/prelu_common.cc +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/prelu.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/prelu.h" - -namespace tflite { - -TfLiteStatus CalculatePreluParams(const TfLiteTensor* input, - const TfLiteTensor* alpha, - TfLiteTensor* output, PreluParams* params) { - if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { - double real_multiplier_1 = static_cast(input->params.scale) / - static_cast(output->params.scale); - double real_multiplier_2 = static_cast(input->params.scale) * - static_cast(alpha->params.scale) / - static_cast(output->params.scale); - QuantizeMultiplier(real_multiplier_1, ¶ms->output_multiplier_1, - ¶ms->output_shift_1); - QuantizeMultiplier(real_multiplier_2, ¶ms->output_multiplier_2, - ¶ms->output_shift_2); - - params->input_offset = -input->params.zero_point; - params->alpha_offset = -alpha->params.zero_point; - params->output_offset = output->params.zero_point; - } - - return kTfLiteOk; -} - -void BroadcastPrelu4DSlowFloat(const RuntimeShape& unextended_input1_shape, - const float* input1_data, - const RuntimeShape& unextended_input2_shape, - const float* input2_data, - const RuntimeShape& unextended_output_shape, - float* output_data) { - TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, - unextended_input2_shape, &desc1, &desc2); - - for (int b = 0; b < output_shape.Dims(0); ++b) { - for (int y = 0; y < output_shape.Dims(1); ++y) { - for (int x = 0; x < output_shape.Dims(2); ++x) { - for (int c = 0; c < output_shape.Dims(3); ++c) { - auto out_idx = Offset(output_shape, b, y, x, c); - auto in1_idx = SubscriptToIndex(desc1, b, y, x, c); - auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); - auto in1_val = input1_data[in1_idx]; - auto in2_val = input2_data[in2_idx]; - output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val; - } - } - } - } -} - -TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - PreluParams* params = static_cast(node->user_data); - - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* alpha = micro_context->AllocateTempInputTensor(node, 1); - TF_LITE_ENSURE(context, alpha != nullptr); - TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); - TF_LITE_ENSURE(context, output != nullptr); - - TF_LITE_ENSURE_OK(context, - CalculatePreluParams(input, alpha, output, params)); - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(alpha); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/quantize.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/quantize.cc deleted file mode 100644 index b5eb9c3c..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/quantize.cc +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/kernels/quantize.h" - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { -namespace { - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, - sizeof(OpDataQuantizeReference)); -} - -} // namespace - -TfLiteRegistration Register_QUANTIZE() { - return tflite::micro::RegisterOp(Init, PrepareQuantizeReference, - EvalQuantizeReference); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/quantize.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/quantize.h deleted file mode 100644 index ba93809a..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/quantize.h +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -struct OpDataQuantizeReference { - tflite::QuantizationParams quantization_params; - // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. - int32_t requantize_output_multiplier; - int requantize_output_shift; - - int32_t input_zero_point; -}; - -TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node); -TfLiteStatus PrepareQuantizeReference(TfLiteContext* context, TfLiteNode* node); -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/quantize_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/quantize_common.cc deleted file mode 100644 index 94220529..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/quantize_common.cc +++ /dev/null @@ -1,239 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/quantize.h" -#include "tensorflow/lite/kernels/internal/reference/requantize.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/quantize.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { - -TfLiteStatus PrepareQuantizeReference(TfLiteContext* context, - TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - auto* data = static_cast(node->user_data); - - TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); - TF_LITE_ENSURE(context, output != nullptr); - - // TODO(b/128934713): Add support for fixed-point per-channel quantization. - // Currently this only support affine per-layer quantization. - TF_LITE_ENSURE_EQ(context, output->quantization.type, - kTfLiteAffineQuantization); - const auto* affine_quantization = - reinterpret_cast(output->quantization.params); - TF_LITE_ENSURE(context, affine_quantization); - TF_LITE_ENSURE(context, affine_quantization->scale); - TF_LITE_ENSURE(context, affine_quantization->scale->size == 1); - - TF_LITE_ENSURE( - context, input->type == kTfLiteFloat32 || input->type == kTfLiteInt32 || - input->type == kTfLiteInt16 || input->type == kTfLiteInt8 || - input->type == kTfLiteUInt8); - TF_LITE_ENSURE(context, output->type == kTfLiteInt8 || - output->type == kTfLiteInt16 || - output->type == kTfLiteInt32 || - output->type == kTfLiteUInt8); - - if ((input->type == kTfLiteInt16 && output->type == kTfLiteInt8) || - (input->type == kTfLiteInt8 && output->type == kTfLiteInt8) || - (input->type == kTfLiteInt8 && output->type == kTfLiteUInt8) || - (input->type == kTfLiteUInt8 && output->type == kTfLiteInt8) || - (input->type == kTfLiteInt8 && output->type == kTfLiteInt16) || - (input->type == kTfLiteInt8 && output->type == kTfLiteInt32) || - (input->type == kTfLiteInt16 && output->type == kTfLiteInt16) || - (input->type == kTfLiteInt16 && output->type == kTfLiteInt32) || - (input->type == kTfLiteInt32 && output->type == kTfLiteInt8) || - (input->type == kTfLiteInt32 && output->type == kTfLiteInt16)) { - double effective_scale = static_cast(input->params.scale) / - static_cast(output->params.scale); - - QuantizeMultiplier(effective_scale, &data->requantize_output_multiplier, - &data->requantize_output_shift); - } - - data->quantization_params.zero_point = output->params.zero_point; - data->quantization_params.scale = static_cast(output->params.scale); - - data->input_zero_point = input->params.zero_point; - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - auto* data = static_cast(node->user_data); - - const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); - TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); - - if (input->type == kTfLiteFloat32) { - switch (output->type) { - case kTfLiteInt8: - reference_ops::AffineQuantize( - data->quantization_params, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - break; - case kTfLiteInt16: - reference_ops::AffineQuantize( - data->quantization_params, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - return kTfLiteOk; - default: - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - } else if (input->type == kTfLiteInt32) { - size_t size = ElementCount(*input->dims); - switch (output->type) { - case kTfLiteInt8: - reference_ops::Requantize( - tflite::micro::GetTensorData(input), size, - data->requantize_output_multiplier, data->requantize_output_shift, - data->input_zero_point, data->quantization_params.zero_point, - tflite::micro::GetTensorData(output)); - break; - case kTfLiteInt16: - reference_ops::Requantize( - tflite::micro::GetTensorData(input), size, - data->requantize_output_multiplier, data->requantize_output_shift, - data->input_zero_point, data->quantization_params.zero_point, - tflite::micro::GetTensorData(output)); - break; - default: - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - } else if (input->type == kTfLiteInt16) { - size_t size = ElementCount(*input->dims); - switch (output->type) { - case kTfLiteInt8: - reference_ops::Requantize( - tflite::micro::GetTensorData(input), size, - data->requantize_output_multiplier, data->requantize_output_shift, - data->input_zero_point, data->quantization_params.zero_point, - tflite::micro::GetTensorData(output)); - break; - case kTfLiteInt16: - reference_ops::Requantize( - tflite::micro::GetTensorData(input), size, - data->requantize_output_multiplier, data->requantize_output_shift, - data->input_zero_point, data->quantization_params.zero_point, - tflite::micro::GetTensorData(output)); - return kTfLiteOk; - case kTfLiteInt32: - reference_ops::Requantize( - tflite::micro::GetTensorData(input), size, - data->requantize_output_multiplier, data->requantize_output_shift, - data->input_zero_point, data->quantization_params.zero_point, - tflite::micro::GetTensorData(output)); - return kTfLiteOk; - default: - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - } else if (input->type == kTfLiteInt8) { - // Int8 to Int8 requantization, required if the input and output tensors - // have different scales and/or zero points. - size_t size = ElementCount(*input->dims); - switch (output->type) { - case kTfLiteInt8: - reference_ops::Requantize( - tflite::micro::GetTensorData(input), size, - data->requantize_output_multiplier, data->requantize_output_shift, - data->input_zero_point, data->quantization_params.zero_point, - tflite::micro::GetTensorData(output)); - break; - case kTfLiteUInt8: - reference_ops::Requantize( - tflite::micro::GetTensorData(input), size, - data->requantize_output_multiplier, data->requantize_output_shift, - data->input_zero_point, data->quantization_params.zero_point, - tflite::micro::GetTensorData(output)); - break; - case kTfLiteInt16: - reference_ops::Requantize( - tflite::micro::GetTensorData(input), size, - data->requantize_output_multiplier, data->requantize_output_shift, - data->input_zero_point, data->quantization_params.zero_point, - tflite::micro::GetTensorData(output)); - break; - case kTfLiteInt32: - reference_ops::Requantize( - tflite::micro::GetTensorData(input), size, - data->requantize_output_multiplier, data->requantize_output_shift, - data->input_zero_point, data->quantization_params.zero_point, - tflite::micro::GetTensorData(output)); - break; - default: - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - } else if (input->type == kTfLiteUInt8) { - size_t size = ElementCount(*input->dims); - switch (output->type) { - case kTfLiteInt8: - reference_ops::Requantize( - tflite::micro::GetTensorData(input), size, - data->requantize_output_multiplier, data->requantize_output_shift, - data->input_zero_point, data->quantization_params.zero_point, - tflite::micro::GetTensorData(output)); - break; - default: - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - } else { - MicroPrintf("Input %s, output %s not supported.", - TfLiteTypeGetName(input->type), - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/read_variable.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/read_variable.cc deleted file mode 100644 index 422c0384..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/read_variable.cc +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_graph.h" -#include "tensorflow/lite/micro/micro_resource_variable.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -namespace { - -constexpr int kInputVariableId = 0; -constexpr int kOutputValue = 0; - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(NumInputs(node) == 1); - TFLITE_DCHECK(NumOutputs(node) == 1); - - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input_resource_id_tensor = - micro_context->AllocateTempInputTensor(node, kInputVariableId); - - TFLITE_DCHECK(input_resource_id_tensor != nullptr); - TFLITE_DCHECK(input_resource_id_tensor->type == kTfLiteResource); - TFLITE_DCHECK(NumElements(input_resource_id_tensor) == 1); - - micro_context->DeallocateTempTfLiteTensor(input_resource_id_tensor); - - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input_resource_id_tensor = - tflite::micro::GetEvalInput(context, node, kInputVariableId); - TFLITE_DCHECK(input_resource_id_tensor != nullptr); - - TfLiteEvalTensor* output_value = - tflite::micro::GetEvalOutput(context, node, kOutputValue); - TFLITE_DCHECK(output_value != nullptr); - - tflite::MicroContext* micro_context = tflite::GetMicroContext(context); - MicroGraph& graph_info = micro_context->graph(); - - MicroResourceVariables* resources = graph_info.GetResourceVariables(); - if (resources == nullptr) { - MicroPrintf( - "READ_VARIABLE requires resource variables. Please create " - "ResourceVariables and pass it to the interpreter."); - return kTfLiteError; - } - TF_LITE_ENSURE_OK( - context, - resources->Read(input_resource_id_tensor->data.i32[0], output_value)); - return kTfLiteOk; -} - -} // namespace. - -TfLiteRegistration Register_READ_VARIABLE() { - return tflite::micro::RegisterOp(nullptr, Prepare, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/reduce.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/reduce.cc deleted file mode 100644 index b4734f93..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/reduce.cc +++ /dev/null @@ -1,72 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/reference/reduce.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/mean.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/reduce.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { - -void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) { - return context->AllocatePersistentBuffer(context, sizeof(OpDataReduce)); -} - -TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) { - return PrepareMaxHelper(context, node, - static_cast(node->user_data)); -} - -TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) { - return PrepareMeanOrSumHelper(context, node, - static_cast(node->user_data)); -} - -TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) { - return EvalMeanHelper(context, node, - static_cast(node->user_data)); -} - -TfLiteStatus EvalMax(TfLiteContext* context, TfLiteNode* node) { - OpDataReduce* op_data = static_cast(node->user_data); - return EvalMaxHelper(context, node, op_data); -} - -TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) { - return EvalSumHelper(context, node, - static_cast(node->user_data)); -} - -TfLiteRegistration Register_MEAN() { - return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalMean); -} - -TfLiteRegistration Register_REDUCE_MAX() { - return tflite::micro::RegisterOp(InitReduce, PrepareMax, EvalMax); -} - -TfLiteRegistration Register_SUM() { - return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalSum); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/reduce.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/reduce.h deleted file mode 100644 index 8d524069..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/reduce.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_ - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -extern const int kMaxNumberOfAxis; -extern const int kMaxNumberOfReducedAxis; - -struct OpDataReduce { - int32_t multiplier; - int shift; - int temp_buffer_idx; - int resolved_axis_idx; - int input_zp; - float input_scale; - int output_zp; - float output_scale; - int num_output_elements; -}; - -TfLiteStatus PrepareMaxHelper(TfLiteContext* context, TfLiteNode* node, - OpDataReduce* op_data); - -TfLiteStatus PrepareMeanOrSumHelper(TfLiteContext* context, TfLiteNode* node, - OpDataReduce* op_data); - -TfLiteStatus EvalMaxHelper(TfLiteContext* context, TfLiteNode* node, - OpDataReduce* op_data); -TfLiteStatus EvalMeanHelper(TfLiteContext* context, TfLiteNode* node, - OpDataReduce* op_data); -TfLiteStatus EvalSumHelper(TfLiteContext* context, TfLiteNode* node, - OpDataReduce* op_data); - -void ReduceResolveAxis(const int* axis_data, int axis_count, - MeanParams* op_params); - -TfLiteRegistration Register_MEAN(); -TfLiteRegistration Register_REDUCE_MAX(); -TfLiteRegistration Register_SUM(); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/reshape.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/reshape.cc deleted file mode 100644 index 832ba261..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/reshape.cc +++ /dev/null @@ -1,118 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { -namespace ops { -namespace micro { -namespace reshape { - -constexpr int kInputTensor = 0; -constexpr int kOutputTensor = 0; - -TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - // Tensorflow's Reshape allows one of the shape components to have the - // special -1 value, meaning it will be calculated automatically based on the - // input. Here we calculate what that dimension should be so that the number - // of output elements in the same as the number of input elements. - int num_input_elements = NumElements(input); - TfLiteIntArray* output_shape = output->dims; - - if (NumInputs(node) == 1 && // Legacy scalar supported with params. - output_shape->size == 1 && output_shape->data[0] == 0) { - // Legacy tflite models use a shape parameter of [0] to indicate scalars, - // so adjust accordingly. TODO(b/111614235): Allow zero-sized buffers during - // toco conversion. - output_shape->size = 0; - } - - int num_output_elements = 1; - int stretch_dim = -1; - for (int i = 0; i < output_shape->size; ++i) { - int value = output_shape->data[i]; - if (value == -1) { - TF_LITE_ENSURE_EQ(context, stretch_dim, -1); - stretch_dim = i; - } else { - num_output_elements *= value; - } - } - if (stretch_dim != -1) { - output_shape->data[stretch_dim] = num_input_elements / num_output_elements; - num_output_elements *= output_shape->data[stretch_dim]; - } - - TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); - TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements); - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - TF_LITE_ENSURE_EQ(context, ReshapeOutput(context, node), kTfLiteOk); - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kInputTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kOutputTensor); - - // TODO(b/162522304): storing input bytes in OpData increases some models - // significantly, possibly due to alignment issues. - size_t input_bytes; - TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes)); - input_bytes *= ElementCount(*input->dims); - - // Do nothing for in-place reshape. - if (input->data.raw != output->data.raw) { - // Otherwise perform reshape with copy. - memcpy(output->data.raw, input->data.raw, input_bytes); - } - return kTfLiteOk; -} - -} // namespace reshape - -TfLiteRegistration Register_RESHAPE() { - return tflite::micro::RegisterOp(nullptr, reshape::Prepare, reshape::Eval); -} - -} // namespace micro -} // namespace ops -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/round.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/round.cc deleted file mode 100644 index 0bda8783..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/round.cc +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/kernels/internal/reference/round.h" - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" - -namespace tflite { -namespace ops { -namespace micro { -namespace round { - -constexpr int kInputTensor = 0; -constexpr int kOutputTensor = 0; - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); - TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type); - TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes); - TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size); - for (int i = 0; i < output->dims->size; ++i) { - TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]); - } - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kInputTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kOutputTensor); - - reference_ops::Round(tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - - return kTfLiteOk; -} -} // namespace round - -TfLiteRegistration Register_ROUND() { - return tflite::micro::RegisterOp(nullptr, round::Prepare, round::Eval); -} - -} // namespace micro -} // namespace ops -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/softmax.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/softmax.h deleted file mode 100644 index 7096d202..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/softmax.h +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" - -namespace tflite { - -void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length); - -// Common helper function to SoftmaxPrepare. -TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, - const TfLiteTensor* input, - TfLiteTensor* output, - const TfLiteSoftmaxParams* params, - SoftmaxParams* op_data); - -TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node); - -// This is the most generic TfLiteRegistration. The actual supported types may -// still be target dependent. The only requirement is that every implementation -// (reference or optimized) must define this function. -TfLiteRegistration Register_SOFTMAX(); - -#if defined(XTENSA) || defined(CMSIS_NN) -// Returns a TfLiteRegistration struct for kernel variant that only supports -// int8 input and int16 output. -TfLiteRegistration Register_SOFTMAX_INT8_INT16(); -#else -inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() { - return Register_SOFTMAX(); -} -#endif - -#if defined(CMSIS_NN) -// Returns a TfLiteRegistration struct for kernel variant that only supports -// int8 input/output and uses the latency optimized implementations. -TfLiteRegistration Register_SOFTMAX_INT8(); - -// Returns a TfLiteRegistration struct for kernel variant that only supports -// int16 input/output and uses the latency optimized implementations. -TfLiteRegistration Register_SOFTMAX_INT16(); - -#else -inline TfLiteRegistration Register_SOFTMAX_INT8() { return Register_SOFTMAX(); } - -inline TfLiteRegistration Register_SOFTMAX_INT16() { - return Register_SOFTMAX(); -} -#endif - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/softmax_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/softmax_common.cc deleted file mode 100644 index b5378dae..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/softmax_common.cc +++ /dev/null @@ -1,162 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/softmax.h" -#include "tensorflow/lite/micro/micro_context.h" - -namespace tflite { - -namespace { -// Softmax parameter data that persists in user_data -const int kInt16LUTArraySize = 513; - -TfLiteStatus InitializeLutForInt16(TfLiteContext* context, - const TfLiteTensor* input, - TfLiteTensor* output, - SoftmaxParams* op_data) { - // Only allocate LUTs for KTfLiteInt16 data type - if (input->type == kTfLiteInt16) { - void* raw_exp_lut = context->AllocatePersistentBuffer( - context, sizeof(int16_t) * kInt16LUTArraySize); - TF_LITE_ENSURE(context, raw_exp_lut != nullptr); - op_data->exp_lut = reinterpret_cast(raw_exp_lut); - void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer( - context, sizeof(int16_t) * kInt16LUTArraySize); - TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr); - op_data->one_over_one_plus_x_lut = - reinterpret_cast(one_over_one_plus_x_lut); - } - - if (output->type == kTfLiteInt16) { - TF_LITE_ENSURE(context, - input->type == kTfLiteInt8 || input->type == kTfLiteInt16); - } else { - TF_LITE_ENSURE_EQ(context, input->type, output->type); - } - - // Populate LUT if required - if (input->type == kTfLiteInt16) { - TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); - // exp LUT only used on negative values - // we consider exp(-10.0) is insignificant to accumulation - gen_lut( - [](float value) { return std::exp(value); }, -10.0f, 0.0f, -1.0f, 1.0f, - op_data->exp_lut); - gen_lut( - [](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f, -1.0f, - 1.0f, op_data->one_over_one_plus_x_lut); - op_data->zero_point = output->params.zero_point; - op_data->scale = output->params.scale; - } - - return kTfLiteOk; -} - -} // namespace - -TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, - const TfLiteTensor* input, - TfLiteTensor* output, - const TfLiteSoftmaxParams* params, - SoftmaxParams* op_data) { - if (InitializeLutForInt16(context, input, output, op_data) != kTfLiteOk) { - return kTfLiteError; - } - - if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) { - if (input->type == kTfLiteInt16) { - TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); - TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768, - (0.001f * 1.f / 32768)); - } else { // input->type == kTfLiteInt8 - TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8); - if (output->type == kTfLiteInt16) { - TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768); - TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536, - (0.001f * 1.f / 65536)); - } else { // output->type == kTfLiteint8 - TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); - TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128); - TF_LITE_ENSURE(context, output->params.scale == 1.f / 256); - } - } - - static const int kScaledDiffIntegerBits = 5; - - // Calculate input_multiplier and input_left_shift - if (input->type == kTfLiteInt16) { - int input_left_shift; - double input_scale_beta_rescale = - static_cast(input->params.scale) * - static_cast(params->beta) / - (10.0 / 65535.0); // scale the input_diff such that [-65535, 0] - // correspond to [-10.0, 0.0] - QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier, - &input_left_shift); - op_data->input_left_shift = input_left_shift; - } else { - int input_left_shift; - tflite::PreprocessSoftmaxScaling( - static_cast(params->beta), - static_cast(input->params.scale), kScaledDiffIntegerBits, - &op_data->input_multiplier, &input_left_shift); - op_data->input_left_shift = input_left_shift; - op_data->diff_min = - -1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits, - op_data->input_left_shift); - } - } else { - TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); - TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32); - op_data->beta = static_cast(params->beta); - } - return kTfLiteOk; -} - -void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams)); -} - -TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0); - TF_LITE_ENSURE(context, input != nullptr); - TF_LITE_ENSURE(context, NumDimensions(input) >= 1); - TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0); - TF_LITE_ENSURE(context, output != nullptr); - - TF_LITE_ENSURE(context, node->user_data != nullptr); - SoftmaxParams* op_data = static_cast(node->user_data); - - auto* params = static_cast(node->builtin_data); - auto ret_val = - CalculateSoftmaxParams(context, input, output, params, op_data); - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(output); - return ret_val; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/split.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/split.cc deleted file mode 100644 index 06584d45..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/split.cc +++ /dev/null @@ -1,128 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { -namespace ops { -namespace micro { -namespace split { - -template -TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node, - const TfLiteEvalTensor* input, int axis_value) { - const int output_count = NumOutputs(node); - const TfLiteIntArray* input_dims = input->dims; - const TfLiteEvalTensor* output0 = - tflite::micro::GetEvalOutput(context, node, 0); - const TfLiteIntArray* output_dims = output0->dims; - - const int split_dimensions = input_dims->size; - int axis = axis_value < 0 ? axis_value + split_dimensions : axis_value; - - TFLITE_DCHECK_LT(axis, split_dimensions); - TFLITE_DCHECK_EQ(output_dims->size, split_dimensions); - - int64_t split_size = output_dims->data[axis] * output_count; - - TFLITE_DCHECK_EQ(split_size, input_dims->data[axis]); - int64_t outer_size = 1; - for (int i = 0; i < axis; ++i) { - outer_size *= input_dims->data[i]; - } - - int64_t base_inner_size = 1; - for (int i = axis + 1; i < split_dimensions; ++i) { - base_inner_size *= input_dims->data[i]; - } - - const T* input_ptr = tflite::micro::GetTensorData(input); - for (int k = 0; k < outer_size; ++k) { - for (int i = 0; i < output_count; ++i) { - TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i); - T* output_data = tflite::micro::GetTensorData(t); - const int copy_size = output_dims->data[axis] * base_inner_size; - T* output_ptr = output_data + k * copy_size; - for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j]; - input_ptr += copy_size; - } - } - - return kTfLiteOk; -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 0); - TF_LITE_ENSURE(context, axis != nullptr); - - // Dynamic output tensors are needed if axis tensor is not constant. - // But Micro doesn't support dynamic memory allocation, so we only support - // constant axis tensor for now. - TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis), - "Non constant axis tensor not supported"); - - micro_context->DeallocateTempTfLiteTensor(axis); - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0); - const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1); - - int axis_value = tflite::micro::GetTensorData(axis)[0]; - if (axis_value < 0) { - axis_value += input->dims->size; - } - - TF_LITE_ENSURE(context, axis_value >= 0); - TF_LITE_ENSURE(context, axis_value < input->dims->size); - - switch (input->type) { - case kTfLiteFloat32: { - return SplitImpl(context, node, input, axis_value); - } - case kTfLiteInt8: { - return SplitImpl(context, node, input, axis_value); - } - case kTfLiteInt16: { - return SplitImpl(context, node, input, axis_value); - } - case kTfLiteInt32: { - return SplitImpl(context, node, input, axis_value); - } - default: - MicroPrintf("Type %s currently not supported.", - TfLiteTypeGetName(input->type)); - return kTfLiteError; - } - - return kTfLiteOk; -} - -} // namespace split - -TfLiteRegistration Register_SPLIT() { - return tflite::micro::RegisterOp(nullptr, split::Prepare, split::Eval); -} - -} // namespace micro -} // namespace ops -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/squared_difference.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/squared_difference.cc deleted file mode 100644 index ca924e26..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/squared_difference.cc +++ /dev/null @@ -1,247 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/binary_function.h" -#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_context.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { -namespace { -constexpr int kInputTensor1 = 0; -constexpr int kInputTensor2 = 1; -constexpr int kOutputTensor = 0; - -struct OpData { - bool requires_broadcast; - ArithmeticParams arithmetic_params; -}; - -template -T SquaredDifference(T input1, T input2) { - const T difference = input1 - input2; - return difference * difference; -} - -void* SquaredDifferenceInit(TfLiteContext* context, const char* buffer, - size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpData)); -} - -TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context, - TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - OpData* data = reinterpret_cast(node->user_data); - data->requires_broadcast = false; - - TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input1 = - micro_context->AllocateTempInputTensor(node, kInputTensor1); - TF_LITE_ENSURE(context, input1 != nullptr); - TfLiteTensor* input2 = - micro_context->AllocateTempInputTensor(node, kInputTensor2); - TF_LITE_ENSURE(context, input2 != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); - output->type = input2->type; - - // Ensure the quantization parameters are equivalent. - if (input1->type == kTfLiteInt8) { - const auto& input1_quantization_params = input1->params; - const auto& input2_quantization_params = input2->params; - const auto& output_quantization_params = output->params; - const int32_t integer_type_min = std::numeric_limits::min(); - const int32_t integer_type_max = std::numeric_limits::max(); - TF_LITE_ENSURE(context, - input1_quantization_params.zero_point >= integer_type_min); - TF_LITE_ENSURE(context, - input1_quantization_params.zero_point <= integer_type_max); - TF_LITE_ENSURE(context, - input2_quantization_params.zero_point >= integer_type_min); - TF_LITE_ENSURE(context, - input2_quantization_params.zero_point <= integer_type_max); - TF_LITE_ENSURE(context, - output_quantization_params.zero_point >= integer_type_min); - TF_LITE_ENSURE(context, - output_quantization_params.zero_point <= integer_type_max); - data->arithmetic_params.input1_offset = - -input1_quantization_params.zero_point; - data->arithmetic_params.input2_offset = - -input2_quantization_params.zero_point; - data->arithmetic_params.output_offset = - output_quantization_params.zero_point; - - // shift to make integer for scales. - // 7 is selected so that maximum shifted result 255^2 * (1 << (7 * 2 )) - // does not overflow signed 32-bit integer - data->arithmetic_params.left_shift = 7; - const double twice_max_input_scale = - 2.0 * static_cast(std::max(input1_quantization_params.scale, - input2_quantization_params.scale)); - const double real_input1_multiplier = - static_cast(input1_quantization_params.scale) / - twice_max_input_scale; - double real_input2_multiplier = - static_cast(input2_quantization_params.scale) / - twice_max_input_scale; - const double real_output_multiplier = - (twice_max_input_scale * twice_max_input_scale) / - static_cast((1 << data->arithmetic_params.left_shift * 2) * - output_quantization_params.scale); - QuantizeMultiplierSmallerThanOneExp( - real_input1_multiplier, &data->arithmetic_params.input1_multiplier, - &data->arithmetic_params.input1_shift); - QuantizeMultiplierSmallerThanOneExp( - real_input2_multiplier, &data->arithmetic_params.input2_multiplier, - &data->arithmetic_params.input2_shift); - QuantizeMultiplierSmallerThanOneExp( - real_output_multiplier, &data->arithmetic_params.output_multiplier, - &data->arithmetic_params.output_shift); - data->arithmetic_params.quantized_activation_min = - std::numeric_limits::min(); - data->arithmetic_params.quantized_activation_max = - std::numeric_limits::max(); - } - - data->requires_broadcast = !HaveSameShapes(input1, input2); - - micro_context->DeallocateTempTfLiteTensor(input1); - micro_context->DeallocateTempTfLiteTensor(input2); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -inline int8_t SquaredDifference(int8_t x, int8_t y, - const ArithmeticParams& params) { - const int32_t input1_val = params.input1_offset + x; - const int32_t input2_val = params.input2_offset + y; - const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); - const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); - const int32_t scaled_input1_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32_t scaled_input2_val = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32_t raw_diff = scaled_input1_val - scaled_input2_val; - - // Max of this is 255^2 * (1 << 14), so won't overflow 32 bits. - const int32_t squared_raw_diff = raw_diff * raw_diff; - const int32_t raw_output = - MultiplyByQuantizedMultiplierSmallerThanOneExp( - squared_raw_diff, params.output_multiplier, params.output_shift) + - params.output_offset; - const int32_t clamped_output = - std::min(params.quantized_activation_max, - std::max(params.quantized_activation_min, raw_output)); - return static_cast(clamped_output); -} - -template -void EvalQuantizedSquaredDifference(TfLiteContext* context, TfLiteNode* node, - const OpData* data, - const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, - TfLiteEvalTensor* output) { - const auto* op_data = static_cast(node->user_data); - if (data->requires_broadcast) { - reference_integer_ops::BroadcastBinaryFunction4DSlow( - op_data->arithmetic_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output), - reference_integer_ops::CheckArithmeticParams, SquaredDifference); - } else { - const int flat_size = tflite::micro::GetTensorShape(input1).FlatSize(); - reference_integer_ops::ElementWise( - flat_size, op_data->arithmetic_params, - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorData(output), - reference_integer_ops::CheckArithmeticParams, SquaredDifference); - } -} - -template -void EvalSquaredDifference(TfLiteContext* context, TfLiteNode* node, - const OpData* data, const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, - TfLiteEvalTensor* output) { - if (data->requires_broadcast) { - reference_ops::BroadcastBinaryFunction4DSlow( - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output), SquaredDifference); - } else { - reference_ops::BinaryFunction( - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output), SquaredDifference); - } -} - -TfLiteStatus SquaredDifferenceEval(TfLiteContext* context, TfLiteNode* node) { - OpData* data = reinterpret_cast(node->user_data); - - const TfLiteEvalTensor* input1 = - tflite::micro::GetEvalInput(context, node, kInputTensor1); - const TfLiteEvalTensor* input2 = - tflite::micro::GetEvalInput(context, node, kInputTensor2); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kOutputTensor); - - if (output->type == kTfLiteFloat32) { - EvalSquaredDifference(context, node, data, input1, input2, output); - } else if (output->type == kTfLiteInt32) { - EvalSquaredDifference(context, node, data, input1, input2, output); - } else if (output->type == kTfLiteInt8) { - EvalQuantizedSquaredDifference(context, node, data, input1, input2, - output); - } else { - MicroPrintf( - "SquaredDifference only supports FLOAT32, INT32 and INT8 now, got %d.", - output->type); - return kTfLiteError; - } - - return kTfLiteOk; -} -} // namespace - -TfLiteRegistration Register_SQUARED_DIFFERENCE() { - return tflite::micro::RegisterOp( - SquaredDifferenceInit, SquaredDifferencePrepare, SquaredDifferenceEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/sub.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/sub.cc deleted file mode 100644 index 40bddbad..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/sub.cc +++ /dev/null @@ -1,168 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/kernels/sub.h" - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/add.h" -#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" -#include "tensorflow/lite/kernels/internal/reference/sub.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -void* SubInit(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpDataSub)); -} - -void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params, - const OpDataSub* data, const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { - float output_activation_min, output_activation_max; - CalculateActivationRange(params->activation, &output_activation_min, - &output_activation_max); - tflite::ArithmeticParams op_params; - SetActivationParams(output_activation_min, output_activation_max, &op_params); - if (data->requires_broadcast) { - tflite::reference_ops::BroadcastSubSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - tflite::reference_ops::SubWithActivation( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } -} - -TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteSubParams* params, const OpDataSub* data, - const TfLiteEvalTensor* input1, - const TfLiteEvalTensor* input2, - TfLiteEvalTensor* output) { - tflite::ArithmeticParams op_params; - op_params.left_shift = data->left_shift; - op_params.input1_offset = data->input1_offset; - op_params.input1_multiplier = data->input1_multiplier; - op_params.input1_shift = data->input1_shift; - op_params.input2_offset = data->input2_offset; - op_params.input2_multiplier = data->input2_multiplier; - op_params.input2_shift = data->input2_shift; - op_params.output_offset = data->output_offset; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; - SetActivationParams(data->output_activation_min, data->output_activation_max, - &op_params); - bool need_broadcast = reference_ops::ProcessBroadcastShapes( - tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorShape(input2), &op_params); - - switch (output->type) { - case kTfLiteInt8: { - if (need_broadcast) { - tflite::reference_ops::BroadcastQuantSubSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - tflite::reference_ops::Sub( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } - break; - } - case kTfLiteInt16: { - if (need_broadcast) { - tflite::reference_ops::BroadcastQuantSubSlow( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } else { - tflite::reference_ops::Sub( - op_params, tflite::micro::GetTensorShape(input1), - tflite::micro::GetTensorData(input1), - tflite::micro::GetTensorShape(input2), - tflite::micro::GetTensorData(input2), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); - } - break; - } - default: - MicroPrintf("Quantized type %s not currently supported.", - TfLiteTypeGetName(output->type)); - return kTfLiteError; - } - return kTfLiteOk; -} - -TfLiteStatus SubEval(TfLiteContext* context, TfLiteNode* node) { - auto* params = reinterpret_cast(node->builtin_data); - - const TfLiteEvalTensor* input1 = - tflite::micro::GetEvalInput(context, node, kSubInputTensor1); - const TfLiteEvalTensor* input2 = - tflite::micro::GetEvalInput(context, node, kSubInputTensor2); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kSubOutputTensor); - - TFLITE_DCHECK(node->user_data != nullptr); - const OpDataSub& data = *(static_cast(node->user_data)); - - if (output->type == kTfLiteFloat32) { - EvalSub(context, node, params, &data, input1, input2, output); - } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { - TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data, - input1, input2, output)); - } else { - MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type), - output->type); - return kTfLiteError; - } - - return kTfLiteOk; -} - -TfLiteRegistration Register_SUB() { - return tflite::micro::RegisterOp(SubInit, SubPrepare, SubEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/sub.h b/code/components/tflite-lib/tensorflow/lite/micro/kernels/sub.h deleted file mode 100644 index 29900221..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/sub.h +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_ -#define TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_ - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -extern const int kSubInputTensor1; -extern const int kSubInputTensor2; -extern const int kSubOutputTensor; - -struct OpDataSub { - bool requires_broadcast; - - // These fields are used in both the general 8-bit -> 8bit quantized path, - // and the special 16-bit -> 16bit quantized path - int input1_shift; - int input2_shift; - int32_t output_activation_min; - int32_t output_activation_max; - - // These fields are used only in the general 8-bit -> 8bit quantized path - int32_t input1_multiplier; - int32_t input2_multiplier; - int32_t output_multiplier; - int output_shift; - int left_shift; - int32_t input1_offset; - int32_t input2_offset; - int32_t output_offset; -}; - -TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteTensor* output, OpDataSub* data); - -TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/sub_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/sub_common.cc deleted file mode 100644 index d6647462..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/sub_common.cc +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/reference/add.h" -#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" -#include "tensorflow/lite/kernels/internal/reference/sub.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/sub.h" - -namespace tflite { - -const int kSubInputTensor1 = 0; -const int kSubInputTensor2 = 1; -const int kSubOutputTensor = 0; - -TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteTensor* output, OpDataSub* data) { - data->requires_broadcast = !HaveSameShapes(input1, input2); - - if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) { - // 8bit -> 8bit general quantized path, with general rescalings - data->input1_offset = -input1->params.zero_point; - data->input2_offset = -input2->params.zero_point; - data->output_offset = output->params.zero_point; - - // The shift is set to 15 in case of 16-bit and 20 in case of 8-bit, - // accordingly. In case of 16-bit we have 65535 << 15 which is less than 1 - // << 31, therefore the addition will still fit in a 32 bit accumulator. - data->left_shift = output->type == kTfLiteInt16 ? 15 : 20; - const float twice_max_input_scale = - 2 * std::max(input1->params.scale, input2->params.scale); - const double real_input1_multiplier = - static_cast(input1->params.scale / twice_max_input_scale); - const double real_input2_multiplier = - static_cast(input2->params.scale / twice_max_input_scale); - const double real_output_multiplier = - static_cast(twice_max_input_scale / - ((1 << data->left_shift) * output->params.scale)); - - QuantizeMultiplierSmallerThanOneExp( - real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); - - QuantizeMultiplierSmallerThanOneExp( - real_input2_multiplier, &data->input2_multiplier, &data->input2_shift); - - QuantizeMultiplierSmallerThanOneExp( - real_output_multiplier, &data->output_multiplier, &data->output_shift); - - TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( - context, params->activation, output, &data->output_activation_min, - &data->output_activation_max)); - } - - return kTfLiteOk; -} - -TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->user_data != nullptr); - TFLITE_DCHECK(node->builtin_data != nullptr); - - OpDataSub* data = static_cast(node->user_data); - auto* params = reinterpret_cast(node->builtin_data); - - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input1 = - micro_context->AllocateTempInputTensor(node, kSubInputTensor1); - TF_LITE_ENSURE(context, input1 != nullptr); - TfLiteTensor* input2 = - micro_context->AllocateTempInputTensor(node, kSubInputTensor2); - TF_LITE_ENSURE(context, input2 != nullptr); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kSubOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - - TF_LITE_ENSURE_STATUS( - CalculateOpDataSub(context, params, input1, input2, output, data)); - - micro_context->DeallocateTempTfLiteTensor(input1); - micro_context->DeallocateTempTfLiteTensor(input2); - micro_context->DeallocateTempTfLiteTensor(output); - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/svdf.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/svdf.cc deleted file mode 100644 index 5994db94..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/svdf.cc +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/kernels/svdf.h" - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/activation_utils.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { -namespace { - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpDataSvdf)); -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - auto* params = reinterpret_cast(node->builtin_data); - TFLITE_DCHECK(node->user_data != nullptr); - const OpDataSvdf& data = *(static_cast(node->user_data)); - - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kSvdfInputTensor); - const TfLiteEvalTensor* weights_feature = - tflite::micro::GetEvalInput(context, node, kSvdfWeightsFeatureTensor); - const TfLiteEvalTensor* weights_time = - tflite::micro::GetEvalInput(context, node, kSvdfWeightsTimeTensor); - const TfLiteEvalTensor* bias = - (NumInputs(node) == 5) - ? tflite::micro::GetEvalInput(context, node, kSvdfBiasTensor) - : nullptr; - TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput( - context, node, kSvdfInputActivationStateTensor); - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kSvdfOutputTensor); - - switch (weights_feature->type) { - case kTfLiteFloat32: { - EvalFloatSvdfReference( - context, node, input, weights_feature, weights_time, bias, params, - data.scratch_tensor_index, activation_state, output); - return kTfLiteOk; - break; - } - - case kTfLiteInt8: { - switch (weights_time->type) { - case kTfLiteInt16: { - EvalInt16SvdfReference(context, node, input, weights_feature, - weights_time, bias, params, activation_state, - output, data); - return kTfLiteOk; - break; - } - case kTfLiteInt8: { - EvalInt8SvdfReference(context, node, input, weights_feature, - weights_time, bias, params, activation_state, - output, data); - return kTfLiteOk; - break; - } - default: - MicroPrintf("Type %s not currently supported.", - TfLiteTypeGetName(weights_time->type)); - return kTfLiteError; - } - } - - default: - MicroPrintf("Type %s not currently supported.", - TfLiteTypeGetName(weights_feature->type)); - return kTfLiteError; - } - return kTfLiteOk; -} - -} // namespace - -TfLiteRegistration Register_SVDF() { - return tflite::micro::RegisterOp(Init, PrepareSvdf, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/svdf_common.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/svdf_common.cc deleted file mode 100644 index fb92b4fd..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/svdf_common.cc +++ /dev/null @@ -1,514 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/kernels/activation_utils.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/svdf.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { - -/** - * This version of SVDF is specific to TFLite Micro. It contains the following - * differences between the TFLite version: - * - * 1.) Scratch tensor allocation - scratch tensors must be known ahead of time - * for the Micro interpreter. - * 2.) Output dimensions - the TFLite version determines output size and runtime - * and resizes the output tensor. Micro runtime does not support tensor - * resizing. - */ - -const int kSvdfInputTensor = 0; -const int kSvdfWeightsFeatureTensor = 1; -const int kSvdfWeightsTimeTensor = 2; -const int kSvdfBiasTensor = 3; -const int kSvdfInputActivationStateTensor = - 4; // This is a variable tensor, and will be modified by this op. -const int kSvdfOutputTensor = 0; - -template -void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node, - const TfLiteEvalTensor* input_tensor, - const TfLiteEvalTensor* weights_feature_tensor, - const TfLiteEvalTensor* weights_time_tensor, - const TfLiteEvalTensor* bias_tensor, - const TfLiteSVDFParams* params, - TfLiteEvalTensor* activation_state_tensor, - TfLiteEvalTensor* output_tensor, - const OpDataSvdf& data) { - const int n_rank = params->rank; - const int n_batch = input_tensor->dims->data[0]; - const int n_input = input_tensor->dims->data[1]; - const int n_filter = weights_feature_tensor->dims->data[0]; - const int n_unit = n_filter / n_rank; - const int n_memory = weights_time_tensor->dims->data[1]; - - TFLITE_DCHECK(context != nullptr); - TFLITE_DCHECK(context->GetScratchBuffer != nullptr); - - int32_t* scratch_tensor = static_cast( - context->GetScratchBuffer(context, data.scratch_tensor_index)); - int32_t* scratch_output_tensor = static_cast( - context->GetScratchBuffer(context, data.scratch_output_tensor_index)); - - // Shift states. - T* const state_ptr = tflite::micro::GetTensorData(activation_state_tensor); - - // Left shift the activation_state. - { - T* new_state_start = state_ptr; - const T* old_state_start = state_ptr + 1; - const T* old_state_end = state_ptr + n_batch * n_filter * n_memory; - while (old_state_start != old_state_end) { - *new_state_start++ = *old_state_start++; - } - } - - // Note: no need to clear the latest activation, matmul is not accumulative. - - // Feature matmul. - { - T* state = tflite::micro::GetTensorData(activation_state_tensor); - const int8_t* input = tflite::micro::GetTensorData(input_tensor); - const int8_t* weight_feature = - tflite::micro::GetTensorData(weights_feature_tensor); - const int32_t output_max = std::numeric_limits::max(); - const int32_t output_min = std::numeric_limits::min(); - T* result_in_batch = state + (n_memory - 1); - for (int b = 0; b < n_batch; b++) { - const int8_t* matrix_ptr = weight_feature; - for (int r = 0; r < n_filter; r++) { - int32_t dot_prod = 0; - const int8_t* vector_in_batch = input + b * n_input; - for (int c = 0; c < n_input; c++) { - dot_prod += - *matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point); - } - dot_prod = MultiplyByQuantizedMultiplier( - dot_prod, data.effective_scale_1_a, data.effective_scale_1_b); - dot_prod = std::min(std::max(output_min, dot_prod), output_max); - // The int16 version of the op assumes a zero_point of 0. This - // code accounts for the potentially non-zero zero_point for the int8 - // version of the op. - *result_in_batch = data.activation_state_zero_point + dot_prod; - result_in_batch += n_memory; - } - } - } - - // Time. - { - for (int b = 0; b < n_batch; ++b) { - int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter; - - // Perform batched vector dot product: - const T* vector1_ptr = - tflite::micro::GetTensorData(weights_time_tensor); - const T* vector2_ptr = - tflite::micro::GetTensorData(activation_state_tensor) + - b * n_memory * n_filter; - - for (int i = 0; i < n_filter; i++) { - *scratch_ptr_batch = 0; - for (int j = 0; j < n_memory; j++) { - *scratch_ptr_batch += - *vector1_ptr++ * - (*vector2_ptr++ - data.activation_state_zero_point); - } - scratch_ptr_batch++; - } - } - } - - // Reduce, add bias, rescale, activation. - { - // Add bias. - if (bias_tensor) { - // Vector batch assign: - const int32_t* bias_data = - tflite::micro::GetTensorData(bias_tensor); - for (int i = 0; i < n_batch; ++i) { - int32_t* output_ptr = scratch_output_tensor + i * n_unit; - const int32_t* bias_ptr = bias_data; - for (int j = 0; j < n_unit; ++j) { - *output_ptr++ = *bias_ptr++; - } - } - } else { - int32_t* output_ptr = scratch_output_tensor; - for (int i = 0; i < n_batch * n_unit; ++i) { - *output_ptr++ = 0; - } - } - - // Reduce. - for (int b = 0; b < n_batch; ++b) { - int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit; - int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter; - - // Reduction sum vector - for (int i = 0; i < n_unit; ++i) { - for (int j = 0; j < n_rank; ++j) { - output_temp_ptr[i] += *scratch_ptr_batch++; - } - } - } - - // Rescale. - const int32_t output_max = std::numeric_limits::max(); - const int32_t output_min = std::numeric_limits::min(); - for (int i = 0; i < n_batch * n_unit; ++i) { - int32_t x1 = scratch_output_tensor[i]; - int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a, - data.effective_scale_2_b); - int32_t x3 = x2 + data.output_zero_point; - int32_t x4 = std::min(std::max(output_min, x3), output_max); - tflite::micro::GetTensorData(output_tensor)[i] = - static_cast(x4); - } - } -} - -/** - * Generate two versions of the integer code. One with int16_t type for the - * time weights and the activation state, and another one with int8_t for the - * same. - */ - -void EvalInt16SvdfReference(TfLiteContext* context, TfLiteNode* node, - const TfLiteEvalTensor* input_tensor, - const TfLiteEvalTensor* weights_feature_tensor, - const TfLiteEvalTensor* weights_time_tensor, - const TfLiteEvalTensor* bias_tensor, - const TfLiteSVDFParams* params, - TfLiteEvalTensor* activation_state_tensor, - TfLiteEvalTensor* output_tensor, - const OpDataSvdf& data) { - EvalIntegerSvdfReference( - context, node, input_tensor, weights_feature_tensor, weights_time_tensor, - bias_tensor, params, activation_state_tensor, output_tensor, data); -} - -void EvalInt8SvdfReference(TfLiteContext* context, TfLiteNode* node, - const TfLiteEvalTensor* input_tensor, - const TfLiteEvalTensor* weights_feature_tensor, - const TfLiteEvalTensor* weights_time_tensor, - const TfLiteEvalTensor* bias_tensor, - const TfLiteSVDFParams* params, - TfLiteEvalTensor* activation_state_tensor, - TfLiteEvalTensor* output_tensor, - const OpDataSvdf& data) { - EvalIntegerSvdfReference( - context, node, input_tensor, weights_feature_tensor, weights_time_tensor, - bias_tensor, params, activation_state_tensor, output_tensor, data); -} - -static inline void ApplyTimeWeightsBiasAndActivation( - int batch_size, int memory_size, int num_filters, int num_units, int rank, - const float* const weights_time_ptr, const float* const bias_ptr, - TfLiteFusedActivation activation, float* const state_ptr, - float* const scratch_ptr, float* const output_ptr) { - // Compute matmul(activation_state, weights_time). - for (int b = 0; b < batch_size; ++b) { - // Perform batched vector dot product: - float* scratch_ptr_batch = scratch_ptr + b * num_filters; - const float* vector1_ptr = weights_time_ptr; - const float* vector2_ptr = state_ptr + b * memory_size * num_filters; - for (int i = 0; i < num_filters; ++i) { - *scratch_ptr_batch = 0.f; - for (int j = 0; j < memory_size; ++j) { - *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++; - } - scratch_ptr_batch++; - } - } - - // Initialize output with bias if provided. - if (bias_ptr) { - // VectorBatchVectorAssign - for (int i = 0; i < batch_size; ++i) { - float* output_data = output_ptr + i * num_units; - const float* bias_data = bias_ptr; - for (int j = 0; j < num_units; ++j) { - *output_data++ = *bias_data++; - } - } - } else { - float* output_data = output_ptr; - for (int i = 0; i < batch_size * num_units; ++i) { - *output_data++ = 0.0f; - } - } - - // Reduction sum. - for (int b = 0; b < batch_size; ++b) { - float* output_ptr_batch = output_ptr + b * num_units; - float* scratch_ptr_batch = scratch_ptr + b * num_filters; - - // Reduction sum vector - for (int i = 0; i < num_units; ++i) { - for (int j = 0; j < rank; j++) { - output_ptr_batch[i] += *scratch_ptr_batch++; - } - } - } - - // Apply activation. - for (int b = 0; b < batch_size; ++b) { - float* output_ptr_batch = output_ptr + b * num_units; - for (int i = 0; i < num_units; ++i) { - *output_ptr_batch = - tflite::ops::micro::ActivationValFloat(activation, *output_ptr_batch); - ++output_ptr_batch; - } - } -} - -void EvalFloatSvdfReference( - TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input, - const TfLiteEvalTensor* weights_feature, - const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias, - const TfLiteSVDFParams* params, int scratch_tensor_index, - TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) { - const int rank = params->rank; - const int batch_size = input->dims->data[0]; - const int input_size = input->dims->data[1]; - const int num_filters = weights_feature->dims->data[0]; - const int num_units = num_filters / rank; - const int memory_size = weights_time->dims->data[1]; - - const float* weights_feature_ptr = - tflite::micro::GetTensorData(weights_feature); - const float* weights_time_ptr = - tflite::micro::GetTensorData(weights_time); - const float* bias_ptr = tflite::micro::GetTensorData(bias); - const float* input_ptr = tflite::micro::GetTensorData(input); - - float* state_ptr = tflite::micro::GetTensorData(activation_state); - - TFLITE_DCHECK(context != nullptr); - TFLITE_DCHECK(context->GetScratchBuffer != nullptr); - - float* scratch_ptr = static_cast( - context->GetScratchBuffer(context, scratch_tensor_index)); - - float* output_ptr = tflite::micro::GetTensorData(output); - - // Left shift the activation_state. - { - float* new_state_start = state_ptr; - const float* old_state_start = state_ptr + 1; - const float* old_state_end = - state_ptr + batch_size * num_filters * memory_size; - while (old_state_start != old_state_end) { - *new_state_start++ = *old_state_start++; - } - } - - // Note: no need to clear the latest activation, matmul is not accumulative. - - // Compute conv1d(inputs, weights_feature). - // The activation_state's rightmost column is used to save current cycle - // activation. This is achieved by starting at state_ptr[memory_size - 1] and - // having the stride equal to memory_size. - - // Perform batched matrix vector multiply operation: - { - const float* matrix = weights_feature_ptr; - const float* vector = input_ptr; - float* result = &state_ptr[memory_size - 1]; - float* result_in_batch = result; - for (int i = 0; i < batch_size; ++i) { - const float* matrix_ptr = matrix; - for (int j = 0; j < num_filters; ++j) { - float dot_prod = 0.0f; - const float* vector_in_batch = vector + i * input_size; - for (int k = 0; k < input_size; ++k) { - dot_prod += *matrix_ptr++ * *vector_in_batch++; - } - *result_in_batch = dot_prod; - result_in_batch += memory_size; - } - } - } - - ApplyTimeWeightsBiasAndActivation( - batch_size, memory_size, num_filters, num_units, rank, weights_time_ptr, - bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr); -} - -TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) { - TFLITE_DCHECK(node->builtin_data != nullptr); - - const auto* params = static_cast(node->builtin_data); - - MicroContext* micro_context = GetMicroContext(context); - - // Validate Tensor Inputs (dtype depends on quantization): - // [0] = Input, {2, batch_size, input_size} - // [1] = Weights Feature, {2, num_filters, input_size} - // [2] = Weights Time, {2, num_filters, memory_size} - // [3] = Bias (optional), {1, num_units} - // [4] = Activation State (variable), - // {2, batch_size, memory_size * num_filters} - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kSvdfInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - TfLiteTensor* weights_feature = - micro_context->AllocateTempInputTensor(node, kSvdfWeightsFeatureTensor); - TF_LITE_ENSURE(context, weights_feature != nullptr); - TfLiteTensor* weights_time = - micro_context->AllocateTempInputTensor(node, kSvdfWeightsTimeTensor); - TF_LITE_ENSURE(context, weights_time != nullptr); - TfLiteTensor* bias = - micro_context->AllocateTempInputTensor(node, kSvdfBiasTensor); - TfLiteTensor* activation_state = micro_context->AllocateTempInputTensor( - node, kSvdfInputActivationStateTensor); - TF_LITE_ENSURE(context, activation_state != nullptr); - - // Define input constants based on input tensor definition above: - const int rank = params->rank; - const int input_size = input->dims->data[1]; - const int batch_size = input->dims->data[0]; - const int num_filters = weights_feature->dims->data[0]; - TF_LITE_ENSURE_EQ(context, num_filters % rank, 0); - const int num_units = num_filters / rank; - const int memory_size = weights_time->dims->data[1]; - - // Validate Input Tensor: - TF_LITE_ENSURE(context, - input->type == kTfLiteFloat32 || input->type == kTfLiteInt8); - TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2); - - // Validate Tensor Output: - // [0] = float/int8_t, {2, batch_size, num_units} - TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kSvdfOutputTensor); - TF_LITE_ENSURE(context, output != nullptr); - TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2); - TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size); - TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units); - - // Validate Weights Feature Input Tensor: - TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2); - TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size); - - // Validate Weights Time Input Tensor: - TF_LITE_ENSURE_EQ(context, NumDimensions(weights_time), 2); - TF_LITE_ENSURE_EQ(context, weights_time->dims->data[0], num_filters); - TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size); - - // Validate Optional Bias Input Tensor: - if (bias != nullptr) { - TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units); - } - - // Validate Activation State Input Tensor: - TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2); - TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size); - TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1], - memory_size * num_filters); - // Since is_variable is not part of TFLiteEvalTensor, check is_variable here. - TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true); - - TF_LITE_ENSURE_EQ(context, node->inputs->size, 5); - - TFLITE_DCHECK(node->user_data != nullptr); - OpDataSvdf* data = static_cast(node->user_data); - - if (input->type == kTfLiteInt8) { - TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8); - TF_LITE_ENSURE(context, (weights_time->type == kTfLiteInt16) || - (weights_time->type == kTfLiteInt8)); - TF_LITE_ENSURE(context, (activation_state->type == kTfLiteInt16) || - (activation_state->type == kTfLiteInt8)); - if (bias != nullptr) { - TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32); - } - - TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); - - const double effective_scale_1 = static_cast( - input->params.scale * weights_feature->params.scale / - activation_state->params.scale); - const double effective_scale_2 = - static_cast(activation_state->params.scale * - weights_time->params.scale / output->params.scale); - - // TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready. - TF_LITE_ENSURE( - context, - std::abs(static_cast(bias->params.scale) - - static_cast(activation_state->params.scale * - weights_time->params.scale)) < 1e-5); - - QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a), - &(data->effective_scale_1_b)); - QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a), - &(data->effective_scale_2_b)); - - data->input_zero_point = input->params.zero_point; - data->output_zero_point = output->params.zero_point; - data->activation_state_zero_point = activation_state->params.zero_point; - - TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr); - - const TfLiteStatus scratch_status = context->RequestScratchBufferInArena( - context, batch_size * num_filters * sizeof(int32_t), - &(data->scratch_tensor_index)); - TF_LITE_ENSURE_OK(context, scratch_status); - - const TfLiteStatus scratch_output_status = - context->RequestScratchBufferInArena( - context, batch_size * num_units * sizeof(int32_t), - &(data->scratch_output_tensor_index)); - TF_LITE_ENSURE_OK(context, scratch_output_status); - } else { - TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32); - TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32); - TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32); - if (bias != nullptr) { - TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32); - } - TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32); - - TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr); - const TfLiteStatus scratch_status = context->RequestScratchBufferInArena( - context, batch_size * num_filters * sizeof(float), - &(data->scratch_tensor_index)); - TF_LITE_ENSURE_OK(context, scratch_status); - } - - micro_context->DeallocateTempTfLiteTensor(input); - micro_context->DeallocateTempTfLiteTensor(weights_feature); - micro_context->DeallocateTempTfLiteTensor(weights_time); - micro_context->DeallocateTempTfLiteTensor(activation_state); - micro_context->DeallocateTempTfLiteTensor(output); - micro_context->DeallocateTempTfLiteTensor(bias); - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc deleted file mode 100644 index 7f3c50e4..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc +++ /dev/null @@ -1,1696 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/quantization_util.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/lstm_eval.h" -#include "tensorflow/lite/micro/kernels/lstm_shared.h" -#include "tensorflow/lite/micro/kernels/micro_tensor_utils.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -namespace { - -constexpr int scratch_index_size = 12; - -struct UnidirectionalSequenceLstmOpData { - // If the lstm is layer norm. - bool use_layer_norm; - // The scratch index. - int scratch_index[scratch_index_size]; - - int32_t row_sums_size; - int32_t* row_sums; - bool compute_row_sums = false; - - int32_t input_zero_point; - int32_t output_state_zero_point; - - IntegerLstmParameter integer_lstm_param; - HybridLstmScales hybrid_lstm_scales; -}; - -TfLiteStatus PopulateQuantizedLstmParams8x8_16( - TfLiteContext* context, TfLiteNode* node, - IntegerLstmParameter* integer_lstm_param) { - MicroContext* micro_context = GetMicroContext(context); - - // Calculate quantized clip for projection and cell. - const auto* params = - static_cast(node->builtin_data); - const float cell_clip = params->cell_clip; - const float proj_clip = params->proj_clip; - - TfLiteTensor* cell_state = - micro_context->AllocateTempInputTensor(node, kLstmCellStateTensor); - TF_LITE_ENSURE(context, cell_state != nullptr); - TF_LITE_ENSURE(context, cell_state->is_variable); - TfLiteTensor* output_tensor = - micro_context->AllocateTempOutputTensor(node, kLstmOutputTensor); - - TF_LITE_ENSURE(context, - cell_state->quantization.type != kTfLiteNoQuantization); - auto* cell_state_params = - static_cast(cell_state->quantization.params); - TF_LITE_ENSURE(context, - output_tensor->quantization.type != kTfLiteNoQuantization); - auto* proj_params = static_cast( - output_tensor->quantization.params); - if (cell_clip > 0.0f) { - integer_lstm_param->quantized_cell_clip = static_cast(std::min( - std::max(cell_clip / cell_state_params->scale->data[0], -32768.0f), - 32767.0f)); - } else { - integer_lstm_param->quantized_cell_clip = 0; - } - if (proj_clip > 0.0f) { - integer_lstm_param->quantized_proj_clip = static_cast(std::min( - std::max(proj_clip / proj_params->scale->data[0], -128.0f), 127.0f)); - } else { - integer_lstm_param->quantized_proj_clip = 0; - } - - // Calculate effective scales. - UnidirectionalSequenceLstmOpData* op_data = - static_cast(node->user_data); - const bool use_layer_norm = op_data->use_layer_norm; - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kLstmInputTensor); - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - TfLiteTensor* input_to_forget_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToForgetWeightsTensor); - TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToCellWeightsTensor); - TfLiteTensor* input_to_output_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToOutputWeightsTensor); - - TfLiteTensor* recurrent_to_input_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToInputWeightsTensor); - TfLiteTensor* recurrent_to_forget_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToForgetWeightsTensor); - TfLiteTensor* recurrent_to_cell_weights = - micro_context->AllocateTempInputTensor(node, - kLstmRecurrentToCellWeightsTensor); - TfLiteTensor* recurrent_to_output_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToOutputWeightsTensor); - - TfLiteTensor* cell_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToInputWeightsTensor); - TfLiteTensor* cell_to_forget_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToForgetWeightsTensor); - TfLiteTensor* cell_to_output_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToOutputWeightsTensor); - - TfLiteTensor* input_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmInputLayerNormCoefficientsTensor); - TfLiteTensor* forget_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmForgetLayerNormCoefficientsTensor); - TfLiteTensor* cell_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmCellLayerNormCoefficientsTensor); - TfLiteTensor* output_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmOutputLayerNormCoefficientsTensor); - - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - - TfLiteTensor* output_state = - micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor); - TF_LITE_ENSURE(context, output_state != nullptr); - TF_LITE_ENSURE(context, output_state->is_variable); - - // Since we have already checked that weights are all there or none, we can - // check the existence of only one to get the condition. - const bool use_cifg = (input_to_input_weights == nullptr); - const bool use_peephole = (cell_to_output_weights != nullptr); - const bool use_projection = (projection_weights != nullptr); - - // Get intermediate scales and zero points. - float intermediate_scale[5]; - int32_t intermediate_zp[5]; - for (int i = 0; i < 4; ++i) { - if (use_layer_norm) { - TfLiteTensor* intermediate = - micro_context->AllocateTempIntermediateTensor(node, i); - TF_LITE_ENSURE(context, - intermediate->quantization.type != kTfLiteNoQuantization); - auto* params_intermediate = static_cast( - intermediate->quantization.params); - intermediate_scale[i] = params_intermediate->scale->data[0]; - intermediate_zp[i] = params_intermediate->zero_point->data[0]; - if (intermediate != nullptr) { - micro_context->DeallocateTempTfLiteTensor(intermediate); - } - } else { - // Q3.12 for activation functions. - intermediate_scale[i] = std::pow(2.0f, -12.0f); - intermediate_zp[i] = 0; - } - } - // In the absence of projection, hidden becomes otuput and this intermediate - // is ignored. - TfLiteTensor* hidden = micro_context->AllocateTempIntermediateTensor(node, 4); - TF_LITE_ENSURE(context, hidden->quantization.type != kTfLiteNoQuantization); - auto* hidden_params = - static_cast(hidden->quantization.params); - intermediate_scale[4] = hidden_params->scale->data[0]; - intermediate_zp[4] = hidden_params->zero_point->data[0]; - if (hidden != nullptr) { - micro_context->DeallocateTempTfLiteTensor(hidden); - } - - // Scales. - const float default_scale = 1.0; - float input_scale = default_scale; - float input_to_input_weight_scale = default_scale; - float recurrent_to_input_weight_scale = default_scale; - float cell_to_input_weight_scale = default_scale; - float input_to_forget_weight_scale = default_scale; - float recurrent_to_forget_weight_scale = default_scale; - float cell_to_forget_weight_scale = default_scale; - float input_to_cell_weight_scale = default_scale; - float recurrent_to_cell_weight_scale = default_scale; - float input_to_output_weight_scale = default_scale; - float recurrent_to_output_weight_scale = default_scale; - float cell_to_output_weight_scale = default_scale; - float projection_weight_scale = default_scale; - float layer_norm_input_scale = default_scale; - float layer_norm_forget_scale = default_scale; - float layer_norm_cell_scale = default_scale; - float layer_norm_output_scale = default_scale; - float output_state_scale = default_scale; - int cell_scale = 1; - - // Effective scales. - float effective_input_to_input_scale = default_scale; - float effective_recurrent_to_input_scale = default_scale; - float effective_cell_to_input_scale = default_scale; - float effective_input_to_forget_scale = default_scale; - float effective_recurrent_to_forget_scale = default_scale; - float effective_cell_to_forget_scale = default_scale; - float effective_input_to_cell_scale = default_scale; - float effective_recurrent_to_cell_scale = default_scale; - float effective_input_to_output_scale = default_scale; - float effective_recurrent_to_output_scale = default_scale; - float effective_cell_to_output_scale = default_scale; - float effective_proj_scale = default_scale; - float effective_hidden_scale = default_scale; - - // Populate scales. - if (!use_cifg) { - input_to_input_weight_scale = input_to_input_weights->params.scale; - recurrent_to_input_weight_scale = recurrent_to_input_weights->params.scale; - } - - if (use_peephole) { - if (!use_cifg) { - cell_to_input_weight_scale = cell_to_input_weights->params.scale; - } - cell_to_forget_weight_scale = cell_to_forget_weights->params.scale; - cell_to_output_weight_scale = cell_to_output_weights->params.scale; - } - - if (use_layer_norm) { - if (!use_cifg) { - layer_norm_input_scale = input_layer_norm_coefficients->params.scale; - } - layer_norm_forget_scale = forget_layer_norm_coefficients->params.scale; - layer_norm_cell_scale = cell_layer_norm_coefficients->params.scale; - layer_norm_output_scale = output_layer_norm_coefficients->params.scale; - } - - if (use_projection) { - projection_weight_scale = projection_weights->params.scale; - } - output_state_scale = output_state->params.scale; - - input_to_forget_weight_scale = input_to_forget_weights->params.scale; - input_to_cell_weight_scale = input_to_cell_weights->params.scale; - input_to_output_weight_scale = input_to_output_weights->params.scale; - recurrent_to_forget_weight_scale = recurrent_to_forget_weights->params.scale; - recurrent_to_cell_weight_scale = recurrent_to_cell_weights->params.scale; - recurrent_to_output_weight_scale = recurrent_to_output_weights->params.scale; - - // Check cell state (already used above) - TF_LITE_ENSURE(context, CheckedLog2(cell_state->params.scale, &cell_scale)); - // TF_LITE_ENSURE(context, cell_scale <= -9); - integer_lstm_param->cell_scale = cell_scale; - input_scale = input->params.scale; - - // Calculate effective scales. - if (!use_cifg) { - effective_input_to_input_scale = - input_to_input_weight_scale * input_scale / intermediate_scale[0]; - effective_recurrent_to_input_scale = recurrent_to_input_weight_scale * - output_state_scale / - intermediate_scale[0]; - } - effective_input_to_forget_scale = - input_to_forget_weight_scale * input_scale / intermediate_scale[1]; - effective_recurrent_to_forget_scale = recurrent_to_forget_weight_scale * - output_state_scale / - intermediate_scale[1]; - - effective_input_to_cell_scale = - input_to_cell_weight_scale * input_scale / intermediate_scale[2]; - effective_recurrent_to_cell_scale = recurrent_to_cell_weight_scale * - output_state_scale / - intermediate_scale[2]; - - effective_input_to_output_scale = - input_to_output_weight_scale * input_scale / intermediate_scale[3]; - effective_recurrent_to_output_scale = recurrent_to_output_weight_scale * - output_state_scale / - intermediate_scale[3]; - - effective_hidden_scale = - std::pow(2.0f, -15.0f) / intermediate_scale[4] * std::pow(2.0f, -15.0f); - - effective_proj_scale = - projection_weight_scale * intermediate_scale[4] / output_state_scale; - - if (use_peephole) { - if (!use_cifg) { - effective_cell_to_input_scale = - std::pow(2.0f, static_cast(cell_scale)) * - cell_to_input_weight_scale / intermediate_scale[0]; - } - effective_cell_to_forget_scale = - std::pow(2.0f, static_cast(cell_scale)) * - cell_to_forget_weight_scale / intermediate_scale[1]; - effective_cell_to_output_scale = - std::pow(2.0f, static_cast(cell_scale)) * - cell_to_output_weight_scale / intermediate_scale[3]; - } - - // Decompose scales. - int shift_output; - QuantizeMultiplier(static_cast(effective_input_to_input_scale), - &integer_lstm_param->effective_input_to_input_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_input_scale), - &integer_lstm_param->effective_recurrent_to_input_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_cell_to_input_scale), - &integer_lstm_param->effective_cell_to_input_scale_a, - &shift_output); - integer_lstm_param->effective_cell_to_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_input_to_forget_scale), - &integer_lstm_param->effective_input_to_forget_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_forget_scale), - &integer_lstm_param->effective_recurrent_to_forget_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_cell_to_forget_scale), - &integer_lstm_param->effective_cell_to_forget_scale_a, - &shift_output); - integer_lstm_param->effective_cell_to_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_input_to_cell_scale), - &integer_lstm_param->effective_input_to_cell_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_cell_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_cell_scale), - &integer_lstm_param->effective_recurrent_to_cell_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_cell_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_input_to_output_scale), - &integer_lstm_param->effective_input_to_output_scale_a, - &shift_output); - integer_lstm_param->effective_input_to_output_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_recurrent_to_output_scale), - &integer_lstm_param->effective_recurrent_to_output_scale_a, - &shift_output); - integer_lstm_param->effective_recurrent_to_output_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_cell_to_output_scale), - &integer_lstm_param->effective_cell_to_output_scale_a, - &shift_output); - integer_lstm_param->effective_cell_to_output_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_proj_scale), - &integer_lstm_param->effective_proj_scale_a, - &shift_output); - integer_lstm_param->effective_proj_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(effective_hidden_scale), - &integer_lstm_param->effective_hidden_scale_a, - &shift_output); - integer_lstm_param->effective_hidden_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_input_scale), - &integer_lstm_param->layer_norm_input_scale_a, - &shift_output); - integer_lstm_param->layer_norm_input_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_forget_scale), - &integer_lstm_param->layer_norm_forget_scale_a, - &shift_output); - integer_lstm_param->layer_norm_forget_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_cell_scale), - &integer_lstm_param->layer_norm_cell_scale_a, - &shift_output); - integer_lstm_param->layer_norm_cell_scale_b = - static_cast(shift_output); - QuantizeMultiplier(static_cast(layer_norm_output_scale), - &integer_lstm_param->layer_norm_output_scale_a, - &shift_output); - integer_lstm_param->layer_norm_output_scale_b = - static_cast(shift_output); - - integer_lstm_param->hidden_zp = intermediate_zp[4]; - - // 10000 is used to make sure the kernel logic does not overflow. - if (!use_cifg) { - integer_lstm_param->input_variance_guard = - std::max(1, static_cast(10000 * layer_norm_input_scale)); - } - integer_lstm_param->forget_variance_guard = - std::max(1, static_cast(10000 * layer_norm_forget_scale)); - integer_lstm_param->cell_variance_guard = - std::max(1, static_cast(10000 * layer_norm_cell_scale)); - integer_lstm_param->output_variance_guard = - std::max(1, static_cast(10000 * layer_norm_output_scale)); - - if (cell_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_state); - } - if (output_tensor != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_tensor); - } - if (input != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input); - } - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); - } - if (input_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights); - } - if (input_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights); - } - if (input_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_output_weights); - } - if (recurrent_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights); - } - if (recurrent_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights); - } - if (recurrent_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights); - } - if (recurrent_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights); - } - if (cell_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_input_weights); - } - if (cell_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_forget_weights); - } - if (cell_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_output_weights); - } - if (input_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_layer_norm_coefficients); - } - if (forget_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients); - } - if (cell_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_layer_norm_coefficients); - } - if (output_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_layer_norm_coefficients); - } - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - if (output_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_state); - } - - return kTfLiteOk; -} - -// Temporary buffers used for hybrid mode -enum HybridTempBuffer { - kPrimaryScratchBuffer = 0, - kInputQuantized = 1, - kOutputStateQuantized = 2, - kCellStateQuantized = 3, - kInputScalingFactors = 4, - kOutputStateScalingFactors = 5, - kProductScalingFactors = 6, - kRecoveredCellWeights = 7, - kAccumScratch = 8, - kInputZeroPoints = 9, - kOutputStateZeroPoints = 10, - kScales = 11, - kNumHybridTempBuffers = 12, -}; - -void* UnidirectionalSequenceLstmInit(TfLiteContext* context, const char* buffer, - size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer( - context, sizeof(UnidirectionalSequenceLstmOpData)); -} - -// Check that input tensor dimensions matches with each other. -TfLiteStatus SetHybridScales(TfLiteContext* context, TfLiteNode* node) { - UnidirectionalSequenceLstmOpData* op_data = - reinterpret_cast(node->user_data); - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - op_data->hybrid_lstm_scales.input_to_input_weights_scale = - (input_to_input_weights != nullptr) ? input_to_input_weights->params.scale - : 1.0f; - - TfLiteTensor* input_to_forget_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToForgetWeightsTensor); - op_data->hybrid_lstm_scales.input_to_forget_weights_scale = - (input_to_forget_weights != nullptr) - ? input_to_forget_weights->params.scale - : 1.0f; - - TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToCellWeightsTensor); - op_data->hybrid_lstm_scales.input_to_cell_weights_scale = - (input_to_cell_weights != nullptr) ? input_to_cell_weights->params.scale - : 1.0f; - - TfLiteTensor* input_to_output_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToOutputWeightsTensor); - op_data->hybrid_lstm_scales.input_to_output_weights_scale = - (input_to_output_weights != nullptr) - ? input_to_output_weights->params.scale - : 1.0f; - - op_data->hybrid_lstm_scales.aux_input_to_input_weights_scale = 1.0f; - op_data->hybrid_lstm_scales.aux_input_to_forget_weights_scale = 1.0f; - op_data->hybrid_lstm_scales.aux_input_to_cell_weights_scale = 1.0f; - op_data->hybrid_lstm_scales.aux_input_to_output_weights_scale = 1.0f; - - TfLiteTensor* recurrent_to_input_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToInputWeightsTensor); - op_data->hybrid_lstm_scales.recurrent_to_input_weights_scale = - (recurrent_to_input_weights != nullptr) - ? recurrent_to_input_weights->params.scale - : 1.0f; - - TfLiteTensor* recurrent_to_forget_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToForgetWeightsTensor); - op_data->hybrid_lstm_scales.recurrent_to_forget_weights_scale = - (recurrent_to_forget_weights != nullptr) - ? recurrent_to_forget_weights->params.scale - : 1.0f; - - TfLiteTensor* recurrent_to_cell_weights = - micro_context->AllocateTempInputTensor(node, - kLstmRecurrentToCellWeightsTensor); - op_data->hybrid_lstm_scales.recurrent_to_cell_weights_scale = - (recurrent_to_cell_weights != nullptr) - ? recurrent_to_cell_weights->params.scale - : 1.0f; - - TfLiteTensor* recurrent_to_output_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToOutputWeightsTensor); - op_data->hybrid_lstm_scales.recurrent_to_output_weights_scale = - (recurrent_to_output_weights != nullptr) - ? recurrent_to_output_weights->params.scale - : 1.0f; - - TfLiteTensor* cell_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToInputWeightsTensor); - op_data->hybrid_lstm_scales.cell_to_input_weights_scale = - (cell_to_input_weights != nullptr) ? cell_to_input_weights->params.scale - : 1.0f; - - TfLiteTensor* cell_to_forget_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToForgetWeightsTensor); - op_data->hybrid_lstm_scales.cell_to_forget_weights_scale = - (cell_to_forget_weights != nullptr) ? cell_to_forget_weights->params.scale - : 1.0f; - - TfLiteTensor* cell_to_output_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToOutputWeightsTensor); - op_data->hybrid_lstm_scales.cell_to_output_weights_scale = - (cell_to_output_weights != nullptr) ? cell_to_output_weights->params.scale - : 1.0f; - - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - op_data->hybrid_lstm_scales.projection_weights_scale = - (projection_weights != nullptr) ? projection_weights->params.scale : 1.0f; - - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); - } - - if (input_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights); - } - - if (input_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights); - } - - if (input_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_output_weights); - } - - if (recurrent_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights); - } - - if (recurrent_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights); - } - - if (recurrent_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights); - } - - if (recurrent_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights); - } - - if (cell_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_input_weights); - } - - if (cell_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_forget_weights); - } - - if (cell_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_output_weights); - } - - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - - return kTfLiteOk; -} - -// Check that input tensor dimensions matches with each other. -TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, - TfLiteNode* node, int n_input, - int n_output, int n_cell, - bool use_layer_norm, bool is_integer) { - MicroContext* micro_context = GetMicroContext(context); - - const auto* params = reinterpret_cast(node->builtin_data); - - // Making sure clipping parameters have valid values. - // == 0 means no clipping - // > 0 means clipping - TF_LITE_ENSURE(context, params->cell_clip >= 0); - TF_LITE_ENSURE(context, params->proj_clip >= 0); - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - if (input_to_input_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input); - } - - TfLiteTensor* input_to_forget_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToForgetWeightsTensor); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input); - - TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToCellWeightsTensor); - TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input); - - TfLiteTensor* recurrent_to_input_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToInputWeightsTensor); - if (recurrent_to_input_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[0], - n_cell); - TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[1], - n_output); - } - - TfLiteTensor* recurrent_to_forget_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToForgetWeightsTensor); - TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[0], - n_cell); - TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[1], - n_output); - - TfLiteTensor* recurrent_to_cell_weights = - micro_context->AllocateTempInputTensor(node, - kLstmRecurrentToCellWeightsTensor); - TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[1], - n_output); - - // We make sure the input-gate's parameters are either both present (regular - // LSTM) or not at all (CIFG-LSTM). - const bool cifg_weights_all_or_none = - ((input_to_input_weights != nullptr) && - (recurrent_to_input_weights != nullptr)) || - ((input_to_input_weights == nullptr) && - (recurrent_to_input_weights == nullptr)); - TF_LITE_ENSURE(context, cifg_weights_all_or_none == true); - - TfLiteTensor* cell_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToInputWeightsTensor); - if (cell_to_input_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_TYPES_EQ( - context, cell_to_input_weights->type, - is_integer ? kTfLiteInt16 : input_to_forget_weights->type); - } - - TfLiteTensor* cell_to_forget_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToForgetWeightsTensor); - if (cell_to_forget_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_TYPES_EQ( - context, cell_to_forget_weights->type, - is_integer ? kTfLiteInt16 : input_to_forget_weights->type); - } - - TfLiteTensor* cell_to_output_weights = micro_context->AllocateTempInputTensor( - node, kLstmCellToOutputWeightsTensor); - if (cell_to_output_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_TYPES_EQ( - context, cell_to_output_weights->type, - is_integer ? kTfLiteInt16 : input_to_forget_weights->type); - } - - // Making sure the peephole weights are there all or none. - const bool use_cifg = (input_to_input_weights == nullptr); - const bool peephole_weights_all_or_none = - ((cell_to_input_weights != nullptr || use_cifg) && - (cell_to_forget_weights != nullptr) && - (cell_to_output_weights != nullptr)) || - ((cell_to_input_weights == nullptr) && - (cell_to_forget_weights == nullptr) && - (cell_to_output_weights == nullptr)); - TF_LITE_ENSURE(context, peephole_weights_all_or_none == true); - - // Make sure the input gate bias is present only when not a CIFG-LSTM. - TfLiteTensor* input_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmInputGateBiasTensor); - if (use_cifg) { - TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr); - } else { - TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, input_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, input_gate_bias->type, kTfLiteFloat32); - } - } - - TfLiteTensor* forget_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmForgetGateBiasTensor); - TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, forget_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, forget_gate_bias->type, kTfLiteFloat32); - } - - TfLiteTensor* cell_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmCellGateBiasTensor); - TF_LITE_ENSURE_EQ(context, cell_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, cell_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, cell_gate_bias->type, kTfLiteFloat32); - } - - TfLiteTensor* output_gate_bias = - micro_context->AllocateTempInputTensor(node, kLstmOutputGateBiasTensor); - TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, output_gate_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, output_gate_bias->type, kTfLiteFloat32); - } - - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - if (projection_weights != nullptr) { - TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output); - TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell); - } - - TfLiteTensor* projection_bias = - micro_context->AllocateTempInputTensor(node, kLstmProjectionBiasTensor); - if (projection_bias != nullptr) { - TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1); - TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, projection_bias->type, kTfLiteInt32); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, projection_bias->type, kTfLiteFloat32); - } - } - - // Making sure the projection tensors are consistent: - // 1) If projection weight is not present, then projection bias should not be - // present. - // 2) If projection weight is present, then projection bias is optional. - const bool projecton_tensors_consistent = - ((projection_weights != nullptr) || (projection_bias == nullptr)); - TF_LITE_ENSURE(context, projecton_tensors_consistent == true); - - if (use_layer_norm) { - TfLiteTensor* input_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmInputLayerNormCoefficientsTensor); - if (use_cifg) { - TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients, nullptr); - } else { - TF_LITE_ENSURE(context, input_layer_norm_coefficients != nullptr); - TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, input_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, input_layer_norm_coefficients->type, - kTfLiteFloat32); - } - } - - TfLiteTensor* forget_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmForgetLayerNormCoefficientsTensor); - TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, forget_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, forget_layer_norm_coefficients->type, - kTfLiteFloat32); - } - - TfLiteTensor* cell_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmCellLayerNormCoefficientsTensor); - TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, cell_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, cell_layer_norm_coefficients->type, - kTfLiteFloat32); - } - - TfLiteTensor* output_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmOutputLayerNormCoefficientsTensor); - TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->size, 1); - TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->data[0], - n_cell); - if (is_integer) { - TF_LITE_ENSURE_TYPES_EQ(context, output_layer_norm_coefficients->type, - kTfLiteInt16); - } else { - TF_LITE_ENSURE_TYPES_EQ(context, output_layer_norm_coefficients->type, - kTfLiteFloat32); - } - if (input_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_layer_norm_coefficients); - } - if (forget_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients); - } - if (cell_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_layer_norm_coefficients); - } - if (output_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_layer_norm_coefficients); - } - } - - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); - } - if (input_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights); - } - if (input_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights); - } - if (recurrent_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights); - } - if (recurrent_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights); - } - micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights); - if (cell_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_input_weights); - } - if (cell_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_forget_weights); - } - if (cell_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_to_output_weights); - } - if (input_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_gate_bias); - } - if (forget_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_gate_bias); - } - if (cell_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_gate_bias); - } - if (output_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_gate_bias); - } - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - if (projection_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_bias); - } - - return kTfLiteOk; -} - -TfLiteStatus PrecomputeZeroPointTimesWeightWithBias( - TfLiteContext* context, int32_t zero_point, - const TfLiteTensor* weight_tensor, const TfLiteTensor* bias_tensor, - int32_t** output) { - if (weight_tensor == nullptr) { - return kTfLiteOk; - } - - const RuntimeShape& weight_shape = GetTensorShape(weight_tensor); - TF_LITE_ENSURE_EQ(context, weight_shape.DimensionsCount(), 2); - const int row = weight_shape.Dims(0); - const int col = weight_shape.Dims(1); - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - *output = static_cast( - context->AllocatePersistentBuffer(context, row * sizeof(int32_t))); - - if (bias_tensor == nullptr) { - memset(*output, 0, row * sizeof(int32_t)); - } else { - const int32_t* bias = GetTensorData(bias_tensor); - memcpy(*output, bias, row * sizeof(int32_t)); - } - if (zero_point != 0) { - const int8_t* weight = GetTensorData(weight_tensor); - micro_tensor_utils::MatrixScalarMultiplyAccumulate(weight, zero_point, row, - col, *output); - } - return kTfLiteOk; -} - -TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias( - TfLiteContext* context, UnidirectionalSequenceLstmOpData* op_data, - TfLiteNode* node) { - MicroContext* micro_context = GetMicroContext(context); - - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kLstmInputTensor); - TfLiteTensor* output_state = - micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor); - TF_LITE_ENSURE(context, output_state != nullptr); - TF_LITE_ENSURE(context, output_state->is_variable); - - const int32_t input_zero_point = -input->params.zero_point; - const int32_t output_state_zero_point = -output_state->params.zero_point; - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - TfLiteTensor* input_to_forget_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToForgetWeightsTensor); - TfLiteTensor* input_to_cell_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToCellWeightsTensor); - TfLiteTensor* input_to_output_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToOutputWeightsTensor); - - TfLiteTensor* recurrent_to_input_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToInputWeightsTensor); - TfLiteTensor* recurrent_to_forget_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToForgetWeightsTensor); - TfLiteTensor* recurrent_to_cell_weights = - micro_context->AllocateTempInputTensor(node, - kLstmRecurrentToCellWeightsTensor); - TfLiteTensor* recurrent_to_output_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToOutputWeightsTensor); - - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - TfLiteTensor* projection_bias = - micro_context->AllocateTempInputTensor(node, kLstmProjectionBiasTensor); - - IntegerLstmParameter* integer_lstm_params = &op_data->integer_lstm_param; - - TfLiteTensor* intermediate = - micro_context->AllocateTempIntermediateTensor(node, 4); - TF_LITE_ENSURE(context, - intermediate->quantization.type != kTfLiteNoQuantization); - const auto* params = - static_cast(intermediate->quantization.params); - const int32_t hidden_zp = params->zero_point->data[0]; - - // Get bias and perform zero point calculation. - // When there is layer normalization, the gate bias does not apply to matmul - // directly: - // y = ln(w * x + w * r + w * c) + b. - const bool is_layer_norm = op_data->use_layer_norm; - - // Forget gate. - TfLiteTensor* forget_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmForgetGateBiasTensor); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_forget_weights, forget_gate_bias, - &(integer_lstm_params->input_to_forget_effective_bias))); - - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_forget_weights, - nullptr, &(integer_lstm_params->recurrent_to_forget_effective_bias))); - - // Modulation gate. - TfLiteTensor* cell_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmCellGateBiasTensor); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_cell_weights, cell_gate_bias, - &(integer_lstm_params->input_to_cell_effective_bias))); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_cell_weights, nullptr, - &(integer_lstm_params->recurrent_to_cell_effective_bias))); - - // Output gate. - TfLiteTensor* output_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmOutputGateBiasTensor); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_output_weights, output_gate_bias, - &(integer_lstm_params->input_to_output_effective_bias))); - - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_output_weights, - nullptr, &(integer_lstm_params->recurrent_to_output_effective_bias))); - - // Input gate. The calculation is only meaningful for non-cifg case. - TfLiteTensor* input_gate_bias = is_layer_norm - ? nullptr - : micro_context->AllocateTempInputTensor( - node, kLstmInputGateBiasTensor); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, input_zero_point, input_to_input_weights, input_gate_bias, - &(integer_lstm_params->input_to_input_effective_bias))); - TF_LITE_ENSURE_OK( - context, - PrecomputeZeroPointTimesWeightWithBias( - context, output_state_zero_point, recurrent_to_input_weights, nullptr, - &(integer_lstm_params->recurrent_to_input_effective_bias))); - - // Projection bias. The calculation is only meaningful for with projection. - TF_LITE_ENSURE_OK(context, - PrecomputeZeroPointTimesWeightWithBias( - context, hidden_zp, projection_weights, projection_bias, - &(integer_lstm_params->projection_effective_bias))); - - if (input != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input); - } - if (output_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_state); - } - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); - } - if (input_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_forget_weights); - } - if (input_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_cell_weights); - } - if (input_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_output_weights); - } - if (recurrent_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_input_weights); - } - if (recurrent_to_forget_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_forget_weights); - } - if (recurrent_to_cell_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_cell_weights); - } - if (recurrent_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights); - } - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - if (projection_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_bias); - } - if (forget_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_gate_bias); - } - if (cell_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_gate_bias); - } - if (output_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_gate_bias); - } - if (input_gate_bias != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_gate_bias); - } - - if (intermediate != nullptr) { - micro_context->DeallocateTempTfLiteTensor(intermediate); - } - - return kTfLiteOk; -} - -// Resize the output and state tensors based on the sizes of the input tensors. -// Allocate a temporary scratch tensor. Also check that the sizes of the input -// tensors match each other. -TfLiteStatus UnidirectionalSequenceLstmPrepare(TfLiteContext* context, - TfLiteNode* node) { - UnidirectionalSequenceLstmOpData* op_data = - reinterpret_cast(node->user_data); - - MicroContext* micro_context = GetMicroContext(context); - - // Check we have all the inputs and outputs we need. - bool use_layer_norm = false; - if (node->inputs->size == 24) { - TfLiteTensor* forget_layer_norm_coefficients = - micro_context->AllocateTempInputTensor( - node, kLstmForgetLayerNormCoefficientsTensor); - if (forget_layer_norm_coefficients == nullptr) { - use_layer_norm = false; - } else { - use_layer_norm = true; - } - if (forget_layer_norm_coefficients != nullptr) { - micro_context->DeallocateTempTfLiteTensor(forget_layer_norm_coefficients); - } - } else if (node->inputs->size == 20) { - // This is deprecated and is only kept here for backward compatibility. - use_layer_norm = false; - } else { - MicroPrintf("The LSTM Full kernel expects 20 or 24 inputs. Got %d inputs", - node->inputs->size); - return kTfLiteError; - } - TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); - op_data->use_layer_norm = use_layer_norm; - - // Inferring batch size, number of outputs and sequence length and - // number of cells from the input tensors. - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kLstmInputTensor); - op_data->input_zero_point = input->params.zero_point; - const bool is_integer = input->type == kTfLiteInt8; - TF_LITE_ENSURE(context, input->dims->size > 1); - const auto* params = - reinterpret_cast( - node->builtin_data); - const bool time_major = params->time_major; - const int n_batch = time_major ? input->dims->data[1] : input->dims->data[0]; - const int n_input = input->dims->data[2]; - - TfLiteTensor* input_to_output_weights = - micro_context->AllocateTempInputTensor(node, - kLstmInputToOutputWeightsTensor); - const int n_cell = input_to_output_weights->dims->data[0]; - TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[1], n_input); - - TfLiteTensor* recurrent_to_output_weights = - micro_context->AllocateTempInputTensor( - node, kLstmRecurrentToOutputWeightsTensor); - TF_LITE_ENSURE_EQ(context, recurrent_to_output_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, recurrent_to_output_weights->dims->data[0], - n_cell); - const int n_output = recurrent_to_output_weights->dims->data[1]; - - // Check that input tensor dimensions matches with each other. - TF_LITE_ENSURE_OK( - context, CheckInputTensorDimensions(context, node, n_input, n_output, - n_cell, use_layer_norm, is_integer)); - - // Get the pointer to output, output_state and cell_state buffer tensors. - TfLiteTensor* output = - micro_context->AllocateTempOutputTensor(node, kLstmOutputTensor); - - TfLiteTensor* output_state = - micro_context->AllocateTempInputTensor(node, kLstmOutputStateTensor); - TF_LITE_ENSURE(context, output_state != nullptr); - TF_LITE_ENSURE(context, output_state->is_variable); - op_data->output_state_zero_point = output_state->params.zero_point; - TfLiteTensor* cell_state = - micro_context->AllocateTempInputTensor(node, kLstmCellStateTensor); - TF_LITE_ENSURE(context, cell_state != nullptr); - TF_LITE_ENSURE(context, cell_state->is_variable); - - // Check the shape of input state tensors. - // These tensor may be 1D or 2D. It's fine as long as the total size is - // correct. - TF_LITE_ENSURE_EQ(context, NumElements(output_state), n_batch * n_output); - TF_LITE_ENSURE_EQ(context, NumElements(cell_state), n_batch * n_cell); - - // Check the shape of output tensor against that of input tensor - TF_LITE_ENSURE_EQ(context, output->dims->size, 3); - TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]); - TF_LITE_ENSURE_EQ(context, input->dims->data[1], output->dims->data[1]); - TF_LITE_ENSURE_EQ(context, output->dims->data[2], n_output); - - if (is_integer) { - const int num_intermediate_tensors = node->intermediates->size; - TF_LITE_ENSURE(context, num_intermediate_tensors == 5); - } - - TfLiteTensor* input_to_input_weights = micro_context->AllocateTempInputTensor( - node, kLstmInputToInputWeightsTensor); - - const bool use_cifg = (input_to_input_weights == nullptr); - - // Create a primary scratch buffer for hybrid and float - // If is_integer, primary scratch buffer has a different size - if (!is_integer) { - int scratch_buffer_size[2]; - scratch_buffer_size[0] = n_batch; - - if (use_cifg) { - // Reserving space for Cell, Forget, Output gates - scratch_buffer_size[1] = n_cell * 3; - } else { - // Reserving space for Input, Cell, Forget, Output gates - scratch_buffer_size[1] = n_cell * 4; - } - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, - scratch_buffer_size[0] * scratch_buffer_size[1] * - TfLiteTypeGetSize(input->type), - &(op_data->scratch_index[kPrimaryScratchBuffer]))); - } - - if (IsHybridOp(input, input_to_output_weights)) { - TF_LITE_ENSURE(context, kNumHybridTempBuffers <= scratch_index_size); - - TF_LITE_ENSURE_OK(context, SetHybridScales(context, node)); - - op_data->compute_row_sums = true; - - // Allocate temporary tensors to store quantized values of input, - // output_state and cell_state tensors. - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, - GetTensorShape(input).FlatSize() * - TfLiteTypeGetSize(input_to_output_weights->type), - &(op_data->scratch_index[kInputQuantized]))); - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, - GetTensorShape(output_state).FlatSize() * - TfLiteTypeGetSize(input_to_output_weights->type), - &(op_data->scratch_index[kOutputStateQuantized]))); - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, - GetTensorShape(cell_state).FlatSize() * - TfLiteTypeGetSize(input_to_output_weights->type), - &(op_data->scratch_index[kCellStateQuantized]))); - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, n_batch * TfLiteTypeGetSize(kTfLiteFloat32), - &(op_data->scratch_index[kScales]))); - - // Allocate temporary buffers to store scaling factors and product scaling - // factors. The latter is a convenience storage which allows to quantize - // a vector once (which produces the scaling factors) and multiply it with - // different matrices (which requires multiplying the scaling factors with - // the scaling factor of the matrix). - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, n_batch * TfLiteTypeGetSize(kTfLiteFloat32), - &(op_data->scratch_index[kInputScalingFactors]))); - - TF_LITE_ENSURE_OK( - context, context->RequestScratchBufferInArena( - context, n_batch * TfLiteTypeGetSize(kTfLiteFloat32), - &(op_data->scratch_index[kOutputStateScalingFactors]))); - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, n_batch * TfLiteTypeGetSize(kTfLiteFloat32), - &(op_data->scratch_index[kProductScalingFactors]))); - - // Allocate a temporary buffer to store the recovered cell weights. Since - // this is used for diagonal matrices, only need to store n_cell values. - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, n_cell * TfLiteTypeGetSize(kTfLiteFloat32), - &(op_data->scratch_index[kRecoveredCellWeights]))); - - // Allocate a temporary buffer to store the accumulated int32 values. - TF_LITE_ENSURE_OK( - context, - context->RequestScratchBufferInArena( - context, n_cell * n_batch * TfLiteTypeGetSize(kTfLiteInt32), - &(op_data->scratch_index[kAccumScratch]))); - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, n_batch * TfLiteTypeGetSize(kTfLiteFloat32), - &(op_data->scratch_index[kInputZeroPoints]))); - - TF_LITE_ENSURE_OK(context, - context->RequestScratchBufferInArena( - context, n_batch * TfLiteTypeGetSize(kTfLiteFloat32), - &(op_data->scratch_index[kOutputStateZeroPoints]))); - - int row_sums_rows = use_cifg ? 6 : 8; - TfLiteTensor* projection_weights = micro_context->AllocateTempInputTensor( - node, kLstmProjectionWeightsTensor); - if (projection_weights != nullptr) { - row_sums_rows += ceil(static_cast(n_output) / n_cell); - } - op_data->row_sums_size = row_sums_rows; - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - op_data->row_sums = static_cast(context->AllocatePersistentBuffer( - context, row_sums_rows * n_cell * sizeof(int32_t))); - if (projection_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(projection_weights); - } - } - - if (is_integer) { - // Integer UnidirectionalSequenceLSTM prepare function for 8x8->16. - // This code path needs 5 intermediate tensors per Op. - // Populate quantization parameters. - PopulateQuantizedLstmParams8x8_16(context, node, - &op_data->integer_lstm_param); - // Allocate scratch buffer. Need 4 16-bit buffer with size n_batch * n_cell - // and 1 8-bit buffer with size n_batch * n_cell. For integer - // UnidirectionalSequenceLSTM, we do not need the extra 32-bit buffer. - for (int i = 0; i < 5; ++i) { - TfLiteType buffer_type = kTfLiteInt16; - - if (i == 4) { - buffer_type = kTfLiteInt8; - } - - TF_LITE_ENSURE_OK( - context, - context->RequestScratchBufferInArena( - context, n_batch * n_cell * TfLiteTypeGetSize(buffer_type), - &(op_data->scratch_index[i]))); - } - - // Populate precomputed zp * weight. - TF_LITE_ENSURE_OK(context, PopulatePrecomputedZPTimesWeightsWithBias( - context, op_data, node)); - } - - if (input != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input); - } - if (input_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_output_weights); - } - if (recurrent_to_output_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(recurrent_to_output_weights); - } - if (output != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output); - } - if (output_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(output_state); - } - if (cell_state != nullptr) { - micro_context->DeallocateTempTfLiteTensor(cell_state); - } - - if (input_to_input_weights != nullptr) { - micro_context->DeallocateTempTfLiteTensor(input_to_input_weights); - } - return kTfLiteOk; -} - -TfLiteStatus UnidirectionalSequenceLstmEval(TfLiteContext* context, - TfLiteNode* node) { - TFLITE_DCHECK(context->GetScratchBuffer != nullptr); - - const auto* params = - reinterpret_cast( - node->builtin_data); - const UnidirectionalSequenceLstmOpData* op_data = - reinterpret_cast(node->user_data); - const bool use_layer_norm = op_data->use_layer_norm; - const bool time_major = params->time_major; - - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kLstmInputTensor); - - const TfLiteEvalTensor* input_to_input_weights = tflite::micro::GetEvalInput( - context, node, kLstmInputToInputWeightsTensor); - - const TfLiteEvalTensor* input_to_forget_weights = tflite::micro::GetEvalInput( - context, node, kLstmInputToForgetWeightsTensor); - - const TfLiteEvalTensor* input_to_cell_weights = - tflite::micro::GetEvalInput(context, node, kLstmInputToCellWeightsTensor); - - const TfLiteEvalTensor* input_to_output_weights = tflite::micro::GetEvalInput( - context, node, kLstmInputToOutputWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_input_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToInputWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_forget_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToForgetWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_cell_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToCellWeightsTensor); - - const TfLiteEvalTensor* recurrent_to_output_weights = - tflite::micro::GetEvalInput(context, node, - kLstmRecurrentToOutputWeightsTensor); - - const TfLiteEvalTensor* cell_to_input_weights = - tflite::micro::GetEvalInput(context, node, kLstmCellToInputWeightsTensor); - - const TfLiteEvalTensor* cell_to_forget_weights = tflite::micro::GetEvalInput( - context, node, kLstmCellToForgetWeightsTensor); - - const TfLiteEvalTensor* cell_to_output_weights = tflite::micro::GetEvalInput( - context, node, kLstmCellToOutputWeightsTensor); - - const TfLiteEvalTensor* input_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmInputGateBiasTensor); - - const TfLiteEvalTensor* forget_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmForgetGateBiasTensor); - - const TfLiteEvalTensor* cell_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmCellGateBiasTensor); - - const TfLiteEvalTensor* output_gate_bias = - tflite::micro::GetEvalInput(context, node, kLstmOutputGateBiasTensor); - - const TfLiteEvalTensor* projection_weights = - tflite::micro::GetEvalInput(context, node, kLstmProjectionWeightsTensor); - - const TfLiteEvalTensor* projection_bias = - tflite::micro::GetEvalInput(context, node, kLstmProjectionBiasTensor); - - TfLiteEvalTensor* output_state = - tflite::micro::GetMutableEvalInput(context, node, kLstmOutputStateTensor); - - TfLiteEvalTensor* cell_state = - tflite::micro::GetMutableEvalInput(context, node, kLstmCellStateTensor); - - TFLITE_DCHECK(cell_state != nullptr); - - const TfLiteEvalTensor* input_layer_norm_coefficients = - use_layer_norm ? tflite::micro::GetEvalInput( - context, node, kLstmInputLayerNormCoefficientsTensor) - : nullptr; - const TfLiteEvalTensor* forget_layer_norm_coefficients = - use_layer_norm - ? tflite::micro::GetEvalInput(context, node, - kLstmForgetLayerNormCoefficientsTensor) - : nullptr; - const TfLiteEvalTensor* cell_layer_norm_coefficients = - use_layer_norm ? tflite::micro::GetEvalInput( - context, node, kLstmCellLayerNormCoefficientsTensor) - : nullptr; - const TfLiteEvalTensor* output_layer_norm_coefficients = - use_layer_norm - ? tflite::micro::GetEvalInput(context, node, - kLstmOutputLayerNormCoefficientsTensor) - : nullptr; - - TfLiteEvalTensor* output = - tflite::micro::GetEvalOutput(context, node, kLstmOutputTensor); - - // Copy out the LSTM specific params so they can be passed in the function. - TfLiteLSTMParams lstm_params; - lstm_params.activation = params->activation; - lstm_params.cell_clip = params->cell_clip; - lstm_params.proj_clip = params->proj_clip; - lstm_params.asymmetric_quantize_inputs = params->asymmetric_quantize_inputs; - - switch (input_to_output_weights->type) { - case kTfLiteFloat32: { - // Index the scratch buffers pointers to the global scratch buffer. - return EvalFloatLstm( - input, input_to_input_weights, input_to_forget_weights, - input_to_cell_weights, input_to_output_weights, - recurrent_to_input_weights, recurrent_to_forget_weights, - recurrent_to_cell_weights, recurrent_to_output_weights, - cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights, - input_layer_norm_coefficients, forget_layer_norm_coefficients, - cell_layer_norm_coefficients, output_layer_norm_coefficients, - /*aux_input=*/nullptr, - /*aux_input_to_input_weights=*/nullptr, - /*aux_input_to_forget_weights=*/nullptr, - /*aux_input_to_cell_weights=*/nullptr, - /*aux_input_to_output_weights=*/nullptr, input_gate_bias, - forget_gate_bias, cell_gate_bias, output_gate_bias, - projection_weights, projection_bias, &lstm_params, - /*forward_sequence=*/true, time_major, - /*output_offset=*/0, - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kPrimaryScratchBuffer])), - output_state, cell_state, output); - } break; - case kTfLiteUInt8: - case kTfLiteInt8: { - const bool is_hybrid = input->type == kTfLiteFloat32; - if (is_hybrid) { - // Index the scratch buffers pointers to the global scratch buffer. - UnidirectionalSequenceLstmOpData* op_data_rw = - reinterpret_cast( - node->user_data); - return EvalHybridLstm( - &(op_data->hybrid_lstm_scales), input, input_to_input_weights, - /*input_to_input_weights_ledger*/ nullptr, input_to_forget_weights, - /*input_to_forget_weights_ledger*/ nullptr, input_to_cell_weights, - /*input_to_cell_weights_ledger*/ nullptr, input_to_output_weights, - /*input_to_output_weights_ledger*/ nullptr, - recurrent_to_input_weights, - /*recurrent_to_input_weights_ledger*/ nullptr, - recurrent_to_forget_weights, - /*recurrent_to_forget_weights_ledger*/ nullptr, - recurrent_to_cell_weights, - /*recurrent_to_cell_weights_ledger*/ nullptr, - recurrent_to_output_weights, - /*recurrent_to_output_weights_ledger*/ nullptr, - cell_to_input_weights, cell_to_forget_weights, - cell_to_output_weights, input_layer_norm_coefficients, - forget_layer_norm_coefficients, cell_layer_norm_coefficients, - output_layer_norm_coefficients, - /*aux_input=*/nullptr, - /*aux_input_to_input_weights=*/nullptr, - /*aux_input_to_forget_weights=*/nullptr, - /*aux_input_to_cell_weights=*/nullptr, - /*aux_input_to_output_weights=*/nullptr, input_gate_bias, - forget_gate_bias, cell_gate_bias, output_gate_bias, - projection_weights, /*projection_weights_ledger*/ nullptr, - projection_bias, &lstm_params, - /*forward_sequence=*/true, time_major, - /*output_offset=*/0, - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kPrimaryScratchBuffer])), - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kInputScalingFactors])), - /*aux_input_sf=*/nullptr, - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kOutputStateScalingFactors])), - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kProductScalingFactors])), - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kRecoveredCellWeights])), - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kInputQuantized])), - /*aux_input_quantized=*/nullptr, - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kOutputStateQuantized])), - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kCellStateQuantized])), - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kScales])), - output_state, cell_state, - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kAccumScratch])), - output, - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kInputZeroPoints])), - /*aux_input_zp=*/nullptr, - reinterpret_cast(context->GetScratchBuffer( - context, op_data->scratch_index[kOutputStateZeroPoints])), - op_data_rw->row_sums, op_data_rw->row_sums_size, - &op_data_rw->compute_row_sums); - } else { - return EvalInteger8x8_16Lstm( - input, input_to_input_weights, input_to_forget_weights, - input_to_cell_weights, input_to_output_weights, - recurrent_to_input_weights, recurrent_to_forget_weights, - recurrent_to_cell_weights, recurrent_to_output_weights, - cell_to_input_weights, cell_to_forget_weights, - cell_to_output_weights, input_layer_norm_coefficients, - forget_layer_norm_coefficients, cell_layer_norm_coefficients, - output_layer_norm_coefficients, input_gate_bias, forget_gate_bias, - cell_gate_bias, output_gate_bias, projection_weights, - projection_bias, &lstm_params, /*forward_sequence=*/true, - time_major, &op_data->integer_lstm_param, - op_data->output_state_zero_point, output_state, cell_state, output, - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[0])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[1])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[2])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[3])), - reinterpret_cast( - context->GetScratchBuffer(context, op_data->scratch_index[4])), - nullptr); - } - } break; - default: - MicroPrintf("Type %s is not currently supported.", - TfLiteTypeGetName(input_to_output_weights->type)); - return kTfLiteError; - } -} - -} // namespace - -TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM() { - return tflite::micro::RegisterOp(UnidirectionalSequenceLstmInit, - UnidirectionalSequenceLstmPrepare, - UnidirectionalSequenceLstmEval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/var_handle.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/var_handle.cc deleted file mode 100644 index db044f3f..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/var_handle.cc +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_graph.h" -#include "tensorflow/lite/micro/micro_resource_variable.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -namespace { - -struct OpData { - int32_t resource_id; -}; - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpData)); -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - OpData* op_data = reinterpret_cast(node->user_data); - const auto* params = - reinterpret_cast(node->builtin_data); - - tflite::MicroContext* micro_context = tflite::GetMicroContext(context); - MicroGraph& graph_info = micro_context->graph(); - - MicroResourceVariables* resources = graph_info.GetResourceVariables(); - if (resources == nullptr) { - MicroPrintf( - "VAR_HANDLE requires resource variables. Please create " - "ResourceVariables and pass it to the interpreter."); - return kTfLiteError; - } - op_data->resource_id = - resources->CreateIdIfNoneFound(params->container, params->shared_name); - if (op_data->resource_id < 0) { - return kTfLiteError; - } - - TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); - TFLITE_DCHECK(output != nullptr); - - // Assign saved resource_id so this output tensor will always return the - // correct resource id. - output->data.i32 = &op_data->resource_id; - - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - OpData* op_data = reinterpret_cast(node->user_data); - - TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); - TFLITE_DCHECK(output != nullptr); - - // Assign saved resource_id so this output tensor will always return the - // correct resource id. - output->data.i32 = &op_data->resource_id; - return kTfLiteOk; -} - -} // namespace. - -TfLiteRegistration Register_VAR_HANDLE() { - return tflite::micro::RegisterOp(Init, Prepare, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/kernels/while.cc b/code/components/tflite-lib/tensorflow/lite/micro/kernels/while.cc deleted file mode 100644 index 811c9eae..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/kernels/while.cc +++ /dev/null @@ -1,133 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include - -#include "tensorflow/lite/c/builtin_op_data.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_context.h" -#include "tensorflow/lite/micro/micro_graph.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { - -namespace { - -struct OpData { - int cond_subgraph_index; - int body_subgraph_index; -}; - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(OpData)); -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - OpData* op_data = reinterpret_cast(node->user_data); - const auto* params = - reinterpret_cast(node->builtin_data); - - op_data->cond_subgraph_index = params->cond_subgraph_index; - op_data->body_subgraph_index = params->body_subgraph_index; - - // The first input is the condition. - tflite::MicroContext* micro_context = tflite::GetMicroContext(context); - - size_t num_inputs = node->inputs->size; - size_t num_outputs = node->outputs->size; - - MicroGraph& graph_info = micro_context->graph(); - - TF_LITE_ENSURE(context, - op_data->cond_subgraph_index < graph_info.NumSubgraphs()); - TF_LITE_ENSURE(context, - op_data->body_subgraph_index < graph_info.NumSubgraphs()); - - TF_LITE_ENSURE_EQ(context, num_inputs, - graph_info.NumSubgraphInputs(op_data->cond_subgraph_index)); - TF_LITE_ENSURE_EQ(context, num_inputs, - graph_info.NumSubgraphInputs(op_data->body_subgraph_index)); - TF_LITE_ENSURE_EQ(context, num_inputs, num_outputs); - TF_LITE_ENSURE_EQ( - context, num_outputs, - graph_info.NumSubgraphOutputs(op_data->body_subgraph_index)); - - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const OpData* op_data = reinterpret_cast(node->user_data); - - tflite::MicroContext* micro_context = tflite::GetMicroContext(context); - MicroGraph* graph_info = µ_context->graph(); - - TF_LITE_ENSURE_OK(context, - tflite::micro::CopyOpInputsToSubgraphInputs( - context, node, graph_info, op_data->cond_subgraph_index, - /*first_tensor_idx=*/0)); - - TF_LITE_ENSURE_OK(context, - graph_info->InvokeSubgraph(op_data->cond_subgraph_index)); - - TfLiteEvalTensor* cond_subgraph_output = graph_info->GetSubgraphOutput( - op_data->cond_subgraph_index, /*tensor_idx=*/0); - bool cond_value = cond_subgraph_output->data.b[0]; - - TF_LITE_ENSURE_OK(context, - tflite::micro::CopyOpInputsToSubgraphInputs( - context, node, graph_info, op_data->body_subgraph_index, - /*first_tensor_idx=*/0)); - TF_LITE_ENSURE_OK(context, - tflite::micro::CopyOpInputsToOpOutputs(context, node)); - - while (cond_value == true) { - // Copy output of this iteration back to the body input. - TF_LITE_ENSURE_OK( - context, tflite::micro::CopyOpOutputsToSubgraphInputs( - context, node, graph_info, op_data->body_subgraph_index)); - TF_LITE_ENSURE_OK(context, - graph_info->InvokeSubgraph(op_data->body_subgraph_index)); - - TF_LITE_ENSURE_OK( - context, tflite::micro::CopySubgraphOutputsToOpOutputs( - context, node, graph_info, op_data->body_subgraph_index)); - TF_LITE_ENSURE_OK( - context, tflite::micro::CopyOpOutputsToSubgraphInputs( - context, node, graph_info, op_data->cond_subgraph_index)); - TF_LITE_ENSURE_OK(context, - graph_info->InvokeSubgraph(op_data->cond_subgraph_index)); - - cond_subgraph_output = graph_info->GetSubgraphOutput( - op_data->cond_subgraph_index, /*tensor_idx=*/0); - cond_value = cond_subgraph_output->data.b[0]; - } - - return kTfLiteOk; -} - -} // namespace. - -TfLiteRegistration Register_WHILE() { - return tflite::micro::RegisterOp(Init, Prepare, Eval); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/memory_helpers.cc b/code/components/tflite-lib/tensorflow/lite/micro/memory_helpers.cc deleted file mode 100644 index 930202de..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/memory_helpers.cc +++ /dev/null @@ -1,170 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/memory_helpers.h" - -#include -#include - -#include "flatbuffers/flatbuffers.h" // from @flatbuffers -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/core/api/flatbuffer_conversions.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -uint8_t* AlignPointerUp(uint8_t* data, size_t alignment) { - std::uintptr_t data_as_uintptr_t = reinterpret_cast(data); - uint8_t* aligned_result = reinterpret_cast( - ((data_as_uintptr_t + (alignment - 1)) / alignment) * alignment); - return aligned_result; -} - -uint8_t* AlignPointerDown(uint8_t* data, size_t alignment) { - std::uintptr_t data_as_uintptr_t = reinterpret_cast(data); - uint8_t* aligned_result = - reinterpret_cast((data_as_uintptr_t / alignment) * alignment); - return aligned_result; -} - -size_t AlignSizeUp(size_t size, size_t alignment) { - size_t aligned_size = (((size + (alignment - 1)) / alignment) * alignment); - return aligned_size; -} - -TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) { - switch (type) { - case kTfLiteFloat16: - *size = sizeof(int16_t); - break; - case kTfLiteFloat32: - *size = sizeof(float); - break; - case kTfLiteFloat64: - *size = sizeof(double); - break; - case kTfLiteInt16: - *size = sizeof(int16_t); - break; - case kTfLiteInt32: - *size = sizeof(int32_t); - break; - case kTfLiteUInt32: - *size = sizeof(uint32_t); - break; - case kTfLiteUInt8: - *size = sizeof(uint8_t); - break; - case kTfLiteInt8: - *size = sizeof(int8_t); - break; - case kTfLiteInt64: - *size = sizeof(int64_t); - break; - case kTfLiteUInt64: - *size = sizeof(uint64_t); - break; - case kTfLiteBool: - *size = sizeof(bool); - break; - case kTfLiteResource: - *size = sizeof(int32_t); - break; - case kTfLiteComplex64: - *size = sizeof(float) * 2; - break; - case kTfLiteComplex128: - *size = sizeof(double) * 2; - break; - default: - return kTfLiteError; - } - return kTfLiteOk; -} - -TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor, - size_t* bytes, size_t* type_size, - ErrorReporter* error_reporter) { - int element_count = 1; - // If flatbuffer_tensor.shape == nullptr, then flatbuffer_tensor is a scalar - // so has 1 element. - if (flatbuffer_tensor.shape() != nullptr) { - for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) { - element_count *= flatbuffer_tensor.shape()->Get(n); - } - } - - TfLiteType tf_lite_type; - TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(), - &tf_lite_type, error_reporter)); - TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(tf_lite_type, type_size)); - *bytes = element_count * (*type_size); - return kTfLiteOk; -} - -TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor, - size_t* out_bytes) { - TFLITE_DCHECK(out_bytes != nullptr); - - int element_count = 1; - // If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element. - if (eval_tensor->dims != nullptr) { - for (int n = 0; n < eval_tensor->dims->size; ++n) { - element_count *= eval_tensor->dims->data[n]; - } - } - size_t type_size; - TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size)); - *out_bytes = element_count * type_size; - return kTfLiteOk; -} - -TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteTensor* output) { - const TfLiteTensor* input = nullptr; - - TF_LITE_ENSURE(context, input1->dims != nullptr); - TF_LITE_ENSURE(context, input2->dims != nullptr); - TF_LITE_ENSURE(context, output->dims->size == 0); - - input = input1->dims->size > input2->dims->size ? input1 : input2; - TF_LITE_ENSURE(context, output->type == input->type); - - size_t size = 0; - TfLiteTypeSizeOf(input->type, &size); - const int dimensions_count = tflite::GetTensorShape(input).DimensionsCount(); - for (int i = 0; i < dimensions_count; i++) { - size *= input->dims->data[i]; - } - - output->bytes = size; - - output->dims = - reinterpret_cast(context->AllocatePersistentBuffer( - context, TfLiteIntArrayGetSizeInBytes(size))); - - output->dims->size = input->dims->size; - for (int i = 0; i < dimensions_count; i++) { - output->dims->data[i] = input->dims->data[i]; - } - - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/memory_helpers.h b/code/components/tflite-lib/tensorflow/lite/micro/memory_helpers.h deleted file mode 100644 index 8f5526ce..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/memory_helpers.h +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_ -#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_ - -#include -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -// Returns the next pointer address aligned to the given alignment. -uint8_t* AlignPointerUp(uint8_t* data, size_t alignment); - -// Returns the previous pointer address aligned to the given alignment. -uint8_t* AlignPointerDown(uint8_t* data, size_t alignment); - -// Returns an increased size that's a multiple of alignment. -size_t AlignSizeUp(size_t size, size_t alignment); - -// Returns size in bytes for a given TfLiteType. -TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size); - -// How many bytes are needed to hold a tensor's contents. -TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor, - size_t* bytes, size_t* type_size, - ErrorReporter* error_reporter); - -// How many bytes are used in a TfLiteEvalTensor instance. The byte length is -// returned in out_bytes. -TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor, - size_t* out_bytes); - -// Deduce output dimensions from input and allocate given size. -// Useful for operators with two inputs where the largest input should equal the -// output dimension. -TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteTensor* output); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc b/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc deleted file mode 100644 index bdfc8304..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc +++ /dev/null @@ -1,452 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h" - -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_string.h" - -namespace tflite { - -namespace { - -// Returns a character representing a numbered buffer -// for GreedyMemoryPlanner::PrintMemoryPlan() -char GetOrdinalCharacter(int i) { - if (i < 10) { - return '0' + i; - } else if (i < 36) { - return 'a' + (i - 10); - } else if (i < 62) { - return 'A' + (i - 36); - } - return '*'; -} - -} // namespace - -// Simple stable in-place sort function. Not time-efficient for large arrays. -// Would normally be in an anonymous namespace to keep it private, but we want -// to be able to test it externally. -void ReverseSortInPlace(int* values, int* ids, int size) { - bool any_swapped; - do { - any_swapped = false; - for (int i = 1; i < size; ++i) { - if (values[i - 1] < values[i]) { - const int value_temp = values[i - 1]; - values[i - 1] = values[i]; - values[i] = value_temp; - const int id_temp = ids[i - 1]; - ids[i - 1] = ids[i]; - ids[i] = id_temp; - any_swapped = true; - } - } - } while (any_swapped); -} - -GreedyMemoryPlanner::GreedyMemoryPlanner() {} - -TfLiteStatus GreedyMemoryPlanner::Init(unsigned char* scratch_buffer, - int scratch_buffer_size) { - // Reset internal states - buffer_count_ = 0; - need_to_calculate_offsets_ = true; - - // Allocate the arrays we need within the scratch buffer arena. - max_buffer_count_ = scratch_buffer_size / per_buffer_size(); - - unsigned char* next_free = scratch_buffer; - requirements_ = reinterpret_cast(next_free); - next_free += sizeof(BufferRequirements) * max_buffer_count_; - - buffer_sizes_sorted_ = reinterpret_cast(next_free); - next_free += sizeof(int) * max_buffer_count_; - - buffer_ids_sorted_ = reinterpret_cast(next_free); - next_free += sizeof(int) * max_buffer_count_; - - buffers_sorted_by_offset_ = reinterpret_cast(next_free); - next_free += sizeof(ListEntry) * max_buffer_count_; - - buffer_offsets_ = reinterpret_cast(next_free); - return kTfLiteOk; -} - -GreedyMemoryPlanner::~GreedyMemoryPlanner() { - // We don't own the scratch buffer, so don't deallocate anything. -} - -TfLiteStatus GreedyMemoryPlanner::AddBuffer( - tflite::ErrorReporter* error_reporter, int size, int first_time_used, - int last_time_used) { - if (buffer_count_ >= max_buffer_count_) { - TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)", - max_buffer_count_); - return kTfLiteError; - } - BufferRequirements* current = &requirements_[buffer_count_]; - current->size = size; - current->first_time_used = first_time_used; - current->last_time_used = last_time_used; - current->offline_offset = kOnlinePlannedBuffer; - ++buffer_count_; - need_to_calculate_offsets_ = true; - return kTfLiteOk; -} - -TfLiteStatus GreedyMemoryPlanner::AddBuffer( - tflite::ErrorReporter* error_reporter, int size, int first_time_used, - int last_time_used, int offline_offset) { - BufferRequirements* current = &requirements_[buffer_count_]; - if (AddBuffer(error_reporter, size, first_time_used, last_time_used) != - kTfLiteOk) { - return kTfLiteError; - } - current->offline_offset = offline_offset; - return kTfLiteOk; -} - -bool GreedyMemoryPlanner::DoesEntryOverlapInTime( - const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used, - const int last_time_used) const { - const BufferRequirements* entry_requirements = - &requirements_[entry->requirements_index]; - if (entry_requirements->first_time_used > last_time_used) { - return false; - } - if (first_time_used > entry_requirements->last_time_used) { - return false; - } - return true; -} - -GreedyMemoryPlanner::ListEntry* -GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer( - const GreedyMemoryPlanner::ListEntry* start, const int first_time_used, - const int last_time_used) { - ListEntry* result = nullptr; - ListEntry* candidate_next_entry; - if (start == nullptr) { - candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_]; - } else { - if (start->next_entry_index == -1) { - return nullptr; - } - candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index]; - } - do { - if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used, - last_time_used)) { - result = candidate_next_entry; - break; - } - if (candidate_next_entry->next_entry_index == -1) { - break; - } - candidate_next_entry = - &buffers_sorted_by_offset_[candidate_next_entry->next_entry_index]; - } while (true); - return result; -} - -void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() { - if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) { - return; - } - need_to_calculate_offsets_ = false; - - // Start off by ordering the buffers in descending order of size. - // This helps find a more compact layout. Intuitively, you can think - // about putting the large buffers in place first, and then the - // smaller buffers can fit in the gaps, rather than fragmenting the - // gaps with small buffers at the beginning. Add offline planned offsets - // first in the list, since they have a predetermined offset. - int idx_from_tail = buffer_count_; - int idx_from_head = 0; - for (int i = 0; i < buffer_count_; ++i) { - if (requirements_[i].offline_offset == kOnlinePlannedBuffer) { - idx_from_tail--; - buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size; - buffer_ids_sorted_[idx_from_tail] = i; - buffer_offsets_[i] = -1; - } else { - buffer_sizes_sorted_[idx_from_head] = requirements_[i].size; - buffer_ids_sorted_[idx_from_head] = i; - buffer_offsets_[i] = requirements_[i].offline_offset; - idx_from_head++; - } - } - - // This sorting algorithm is naive, and may end up taking a very long time - // with hundreds of buffers. Do not sort the offline planned offsets. - ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head], - &buffer_ids_sorted_[idx_from_head], - buffer_count_ - idx_from_head); - - // Initialize the first entry to the first buffer in - // buffer_ids_sorted_. - // - If there are no offline planned offsets, the largest buffer will be - // first, and the buffers will be handled in size order. - // - If offline offsets are present, these will be handled first in order - // for the greedy algorithm to utilized gaps in the offline plan. - first_entry_index_ = 0; - next_free_entry_ = 1; - ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_]; - first_entry->next_entry_index = -1; // to mark the entry as end of list - int buffer_id = buffer_ids_sorted_[0]; - first_entry->requirements_index = buffer_id; - if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) { - buffer_offsets_[buffer_id] = 0; - } - first_entry->offset = buffer_offsets_[buffer_id]; - - // Work through the rest of the buffers to find a good gap to place each one. - for (int i = 1; i < buffer_count_; ++i) { - // The id is the order the buffer was originally added by the client. - buffer_id = buffer_ids_sorted_[i]; - // Look at what size and time range the buffer needs to be active. - BufferRequirements* wanted_requirements = &requirements_[buffer_id]; - const int wanted_size = wanted_requirements->size; - const int wanted_first_time_used = wanted_requirements->first_time_used; - const int wanted_last_time_used = wanted_requirements->last_time_used; - - // Find the first buffer that's active in our time range. All placed - // buffers are stored in the order of their starting position in the arena - // so that it's easy to find the next buffer in memory, and so the gap. - // The candidate_entry variable holds the buffer that we're considering - // placing the current buffer after. - - int candidate_offset = 0; - // Loop through the offset-ordered list of buffers, looking for gaps. - if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) { - ListEntry* prior_entry = nullptr; - while (true) { - // Find out what the next active buffer is. - ListEntry* next_entry = NextSimultaneouslyActiveBuffer( - prior_entry, wanted_first_time_used, wanted_last_time_used); - - if (prior_entry) { - BufferRequirements* candidate_requirements = - &requirements_[prior_entry->requirements_index]; - const int prior_entry_offset = - prior_entry->offset + candidate_requirements->size; - if (prior_entry_offset > candidate_offset) { - candidate_offset = prior_entry_offset; - } - } - if (next_entry == nullptr) { - // We're at the end of the list, so we can always append the buffer - // here. - break; - } - // Find out how much space there is between us and the next buffer. - const int gap = next_entry->offset - candidate_offset; - if (gap >= wanted_size) { - // This entry has a big enough gap between it and the next, so - // use it! - break; - } - // The gap wasn't big enough, so move on to another candidate. - prior_entry = next_entry; - } - } else { - // Offline planned offset are to be considered constant - candidate_offset = wanted_requirements->offline_offset; - } - // At this point, we've either found a gap (possibly at the end of the - // list) and want to place the buffer there, or there are no other active - // buffers in this time range and so we can put it at offset zero. - // Record the buffer's offset in our plan. - buffer_offsets_[buffer_id] = candidate_offset; - // Add the newly-placed buffer to our offset-ordered list, so that - // subsequent passes can fit in their buffers around it. - ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_]; - new_entry->offset = candidate_offset; - new_entry->requirements_index = buffer_id; - const int new_entry_index = next_free_entry_; - ++next_free_entry_; - - if (first_entry->offset > candidate_offset) { - // The new entry offset is smaller than the first entry offset => - // replace the first entry - first_entry = new_entry; - first_entry->next_entry_index = first_entry_index_; - first_entry_index_ = new_entry_index; - } else { - ListEntry* current_entry = first_entry; - // Make sure that we insert the buffer at the correct place in the - // buffer-offset-ordered list - while (true) { - const int next_entry_index = current_entry->next_entry_index; - if (next_entry_index == -1) { - // We're at the end of the list, so just add the new entry here. - current_entry->next_entry_index = new_entry_index; - new_entry->next_entry_index = -1; - break; - } - // not at the end of the list -> take a look at next entry - ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index]; - if (next_entry->offset > candidate_offset) { - // We're at the right spot to do an insertion and retain the sorting - // order, so place the new entry here. - new_entry->next_entry_index = current_entry->next_entry_index; - current_entry->next_entry_index = new_entry_index; - break; - } - current_entry = next_entry; - } - } - } -} - -size_t GreedyMemoryPlanner::GetMaximumMemorySize() { - CalculateOffsetsIfNeeded(); - if (buffer_count_ == 0) { - return 0; - } - ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_]; - size_t max_size = 0; - while (entry) { - BufferRequirements* requirements = - &requirements_[entry->requirements_index]; - const size_t current_size = entry->offset + requirements->size; - if (current_size > max_size) { - max_size = current_size; - } - if (entry->next_entry_index == -1) { - break; - } - entry = &buffers_sorted_by_offset_[entry->next_entry_index]; - } - return max_size; -} - -void GreedyMemoryPlanner::PrintMemoryPlan() { - CalculateOffsetsIfNeeded(); - - for (int i = 0; i < buffer_count_; ++i) { - MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d", - GetOrdinalCharacter(i), i, requirements_[i].size, - buffer_offsets_[i], requirements_[i].first_time_used, - requirements_[i].last_time_used); - } - - constexpr int kLineWidth = 80; - int max_size = kLineWidth; - int max_time = 0; - for (int i = 0; i < buffer_count_; ++i) { - BufferRequirements* requirements = &requirements_[i]; - const int offset = buffer_offsets_[i]; - const int last_time_used = requirements->last_time_used; - const int size = offset + requirements->size; - if (size > max_size) { - max_size = size; - } - if (last_time_used > max_time) { - max_time = last_time_used; - } - } - - char line[kLineWidth + 1]; - for (int t = 0; t <= max_time; ++t) { - for (int c = 0; c < kLineWidth; ++c) { - line[c] = '.'; - } - int memory_use = 0; - for (int i = 0; i < buffer_count_; ++i) { - BufferRequirements* requirements = &requirements_[i]; - if ((t < requirements->first_time_used) || - (t > requirements->last_time_used)) { - continue; - } - const int offset = buffer_offsets_[i]; - if (offset == -1) { - continue; - } - const int size = requirements->size; - memory_use += size; - const int line_start = (offset * kLineWidth) / max_size; - const int line_end = ((offset + size) * kLineWidth) / max_size; - for (int n = line_start; n < line_end; ++n) { - if (line[n] == '.') { - line[n] = GetOrdinalCharacter(i); - } else { - line[n] = '!'; - } - } - } - line[kLineWidth] = 0; - - MicroPrintf("%s%d: %s (%dk)", t < 10 ? " " : "", t, (const char*)line, - (memory_use + 1023) / 1024); - } -} - -int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; } - -TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer( - tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) { - CalculateOffsetsIfNeeded(); - if ((buffer_index < 0) || (buffer_index >= buffer_count_)) { - TF_LITE_REPORT_ERROR(error_reporter, - "buffer index %d is outside range 0 to %d", - buffer_index, buffer_count_); - return kTfLiteError; - } - *offset = buffer_offsets_[buffer_index]; - return kTfLiteOk; -} - -bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) { - CalculateOffsetsIfNeeded(); - bool were_overlaps_found = false; - for (int i = 0; i < buffer_count_; ++i) { - BufferRequirements* a_requirements = &requirements_[i]; - const int a_start_offset = buffer_offsets_[i]; - const int a_first_time_used = a_requirements->first_time_used; - const int a_last_time_used = a_requirements->last_time_used; - const int a_end_offset = a_start_offset + a_requirements->size; - for (int j = 0; j < buffer_count_; ++j) { - if (i == j) { - continue; - } - BufferRequirements* b_requirements = &requirements_[j]; - const int b_start_offset = buffer_offsets_[j]; - const int b_first_time_used = b_requirements->first_time_used; - const int b_last_time_used = b_requirements->last_time_used; - const int b_end_offset = b_start_offset + b_requirements->size; - if ((a_first_time_used > b_last_time_used) || - (b_first_time_used > a_last_time_used)) { - // Buffers don't overlap in time. - continue; - } - if ((a_start_offset >= b_end_offset) || - (b_start_offset >= a_end_offset)) { - // No overlap in memory. - continue; - } - were_overlaps_found = true; - TF_LITE_REPORT_ERROR( - error_reporter, "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)", - i, a_first_time_used, a_last_time_used, a_start_offset, a_end_offset, - j, b_first_time_used, b_last_time_used, b_start_offset, b_end_offset); - } - } - return were_overlaps_found; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h b/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h deleted file mode 100644 index a34f3c59..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h +++ /dev/null @@ -1,167 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_ -#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_ - -#include "tensorflow/lite/micro/compatibility.h" -#include "tensorflow/lite/micro/memory_planner/micro_memory_planner.h" - -namespace tflite { - -constexpr int kOnlinePlannedBuffer = -1; - -// A memory planner that uses a greedy algorithm to arrange buffers in memory -// to minimize the overall arena size needed. -// -// The algorithm works like this: -// - The client enters the buffer information through AddBuffer(). -// - When a function like GetOffsetForBuffer() is called, the -// CalculateOffsetsIfNeeded() method is invoked. -// - If an up to date plan is not already present, one will be calculated. -// - The buffers are sorted in descending order of size. -// - The largest buffer is placed at offset zero. -// - The rest of the buffers are looped through in descending size order. -// - The other buffers that need to be in memory at the same time are found. -// - The first gap between simultaneously active buffers that the current -// buffer fits into will be used. -// - If no large-enough gap is found, the current buffer is placed after the -// last buffer that's simultaneously active. -// - This continues until all buffers are placed, and the offsets stored. -// -// This is not guaranteed to produce the best placement, since that's an -// NP-Complete problem, but in practice it should produce one that's decent. -class GreedyMemoryPlanner : public MicroMemoryPlanner { - public: - GreedyMemoryPlanner(); - ~GreedyMemoryPlanner() override; - - // You need to pass in an area of memory to be used for planning. The client - // should ensure the validity of the memory when it needs to use this object. - // This memory isn't owned by this object, so management should be handled by - // the client. This is so it can be stack or globally allocated if necessary - // on devices without dynamic memory allocation. How many buffers can be - // planned for will depend on the size of this scratch memory, so you should - // enlarge it if you see an error when calling AddBuffer(). The memory can be - // reused once you're done with the planner, as long as you copy the - // calculated offsets to another location. Each buffer requires about 36 bytes - // of scratch. - TfLiteStatus Init(unsigned char* scratch_buffer, - int scratch_buffer_size) override; - - // Record details of a buffer we want to place. - TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size, - int first_time_used, int last_time_used) override; - - // Record details of an offline planned buffer offset we want to place. - // offline_offset is the buffer offset from the start of the arena. - TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size, - int first_time_used, int last_time_used, - int offline_offset) override; - - // Returns the high-water mark of used memory. This is the minimum size of a - // memory arena you'd need to allocate to hold these buffers. - size_t GetMaximumMemorySize() override; - - // How many buffers have been recorded. - int GetBufferCount() override; - - // Where a given buffer should be placed in the memory arena. - // This information is stored in the memory arena itself, so once the arena - // is used for inference, it will be overwritten. - TfLiteStatus GetOffsetForBuffer(ErrorReporter* error_reporter, - int buffer_index, int* offset) override; - - // Prints an ascii-art diagram of the buffer layout plan. - void PrintMemoryPlan() override; - - // Debug method to check whether any buffer allocations are overlapping. This - // is an O(N^2) complexity operation, so only use for testing. - bool DoAnyBuffersOverlap(ErrorReporter* error_reporter); - - // Used to store a list of buffers ordered by their offset. - struct ListEntry { - int offset; - int requirements_index; - int next_entry_index; - }; - - // Number of bytes required in order to plan a buffer. - static size_t per_buffer_size() { - const int per_buffer_size = - sizeof(BufferRequirements) + // requirements_ - sizeof(int) + // buffer_sizes_sorted_ - sizeof(int) + // buffer_ids_sorted_ - sizeof(ListEntry) + // buffers_sorted_by_offset_ - sizeof(int); // buffer_offsets_; - return per_buffer_size; - } - - private: - // Whether a buffer is active in a given time range. - bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used, - const int last_time_used) const; - - // Walks the list to return the next buffer that is active in a given time - // range, or a null pointer if there are none. - ListEntry* NextSimultaneouslyActiveBuffer(const ListEntry* start, - const int first_time_used, - const int last_time_used); - - // If there isn't an up to date plan, calculate a new one. - void CalculateOffsetsIfNeeded(); - - // How many buffers we can plan for, based on the arena size we're given in - // the constructor. - int max_buffer_count_; - - // The number of buffers added so far. - int buffer_count_; - - // Records the client-provided information about each buffer. - struct BufferRequirements { - int size; - int offline_offset; - int first_time_used; - int last_time_used; - }; - - // Working arrays used during the layout algorithm. - BufferRequirements* requirements_; - // buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to: - // { - // offline planned buffers, - // online planned buffers sorted by size - // } - int* buffer_sizes_sorted_; - int* buffer_ids_sorted_; - ListEntry* buffers_sorted_by_offset_; - int next_free_entry_; // Index of the next free entry of - // buffers_sorted_by_offset_ - int first_entry_index_; // Index of the first entry (smallest offset) of - // buffers_sorted_by_offset_ - - // Stores the outcome of the plan, the location of each buffer in the arena. - int* buffer_offsets_; - - // Whether buffers have been added since the last plan was calculated. - bool need_to_calculate_offsets_; - - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/linear_memory_planner.cc b/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/linear_memory_planner.cc deleted file mode 100644 index d25a4f22..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/linear_memory_planner.cc +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/memory_planner/linear_memory_planner.h" - -namespace tflite { - -LinearMemoryPlanner::LinearMemoryPlanner() - : current_buffer_count_(0), next_free_offset_(0) {} -LinearMemoryPlanner::~LinearMemoryPlanner() {} - -TfLiteStatus LinearMemoryPlanner::AddBuffer( - tflite::ErrorReporter* error_reporter, int size, int first_time_used, - int last_time_used) { - if (current_buffer_count_ >= kMaxBufferCount) { - TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)", - kMaxBufferCount); - return kTfLiteError; - } - buffer_offsets_[current_buffer_count_] = next_free_offset_; - next_free_offset_ += size; - ++current_buffer_count_; - return kTfLiteOk; -} - -size_t LinearMemoryPlanner::GetMaximumMemorySize() { return next_free_offset_; } - -int LinearMemoryPlanner::GetBufferCount() { return current_buffer_count_; } - -TfLiteStatus LinearMemoryPlanner::GetOffsetForBuffer( - tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) { - if ((buffer_index < 0) || (buffer_index >= current_buffer_count_)) { - TF_LITE_REPORT_ERROR(error_reporter, - "buffer index %d is outside range 0 to %d", - buffer_index, current_buffer_count_); - return kTfLiteError; - } - *offset = buffer_offsets_[buffer_index]; - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/linear_memory_planner.h b/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/linear_memory_planner.h deleted file mode 100644 index 128ef808..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/linear_memory_planner.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_ -#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_ - -#include "tensorflow/lite/micro/compatibility.h" -#include "tensorflow/lite/micro/memory_planner/micro_memory_planner.h" - -namespace tflite { - -// The simplest possible memory planner that just lays out all buffers at -// increasing offsets without trying to reuse memory. -class LinearMemoryPlanner : public MicroMemoryPlanner { - public: - LinearMemoryPlanner(); - ~LinearMemoryPlanner() override; - - TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter, int size, - int first_time_used, int last_time_used) override; - - size_t GetMaximumMemorySize() override; - int GetBufferCount() override; - TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter, - int buffer_index, int* offset) override; - - private: - static constexpr int kMaxBufferCount = 1024; - size_t buffer_offsets_[kMaxBufferCount]; - int current_buffer_count_; - size_t next_free_offset_; - - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/memory_plan_struct.h b/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/memory_plan_struct.h deleted file mode 100644 index c8c431cc..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/memory_plan_struct.h +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLAN_STRUCT_H_ -#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLAN_STRUCT_H_ - -#include -#include - -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { - -// This is an experimental feature and subjected to change. -// More description is available at -// tensorflow/lite/micro/docs/offline_memory_plan.md. - -// Describes a buffer's layout inside an arena. This struct should be kept as -// small as possible for memory footprint sensitive applications and should use -// only primitive fields, making it easy to adjust offline. -struct BufferDescriptor { - // Starting offset inside an arena for this buffer. - // Offset is the minimum information needed for the buffer. The user knows - // the model and the size of each buffer in order to lay out a valid buffer - // plan. - int32_t offset; -}; - -// A structure describing the lay out of buffers inside an arena. -struct BufferPlan { - // Number of buffers described in this plan. - int32_t buffer_count; - - // Each element describes one buffer. - // Buffer index is implicit by the order of AddBuffer() call. - // Specifically, indices of activation tensors are 0 … N-1 where N is the - // number of activation tensors. - // The rest are based on the order of OP requests. - // - // This is a flexible array member and should ideally be - // arena_entries[]; However, in order to support a variety - // of compilers (and without needing to add ifdef's), we - // are implementing the flexible array member with an array of - // length 1 as the last member of the struct. When the size of a BufferPlan - // is needed, use the provided SizeOfBufferPlan(buffer_count) that - // accounts for this implemenatation caveat. - BufferDescriptor buffer_plan_entries[1]; -}; - -// Returns size of a BufferPlan given a buffer count. This size is compile time -// known if buffer_count is a compile time constant. -constexpr size_t SizeOfBufferPlan(int32_t buffer_count) { - // Minus 1 because a BufferPlan struct have a BufferDescriptor already. - // Max to provide a lower bound for the corner case of buffer_count = 0. - return sizeof(BufferPlan) + - sizeof(BufferDescriptor) * Max(buffer_count - 1, 0); -} - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLAN_STRUCT_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/micro_memory_planner.h b/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/micro_memory_planner.h deleted file mode 100644 index cec56335..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/micro_memory_planner.h +++ /dev/null @@ -1,95 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_ -#define TENSORFLOW_LITE_MICRO_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" - -namespace tflite { - -// Interface class for planning the layout of memory buffers during the -// execution of a graph. -// It's designed to be used by a client that iterates in any order through the -// buffers it wants to lay out, and then calls the getter functions for -// information about the calculated layout. For example: -// -// SomeMemoryPlanner planner; -// planner.AddBuffer(reporter, 100, 0, 1); // Buffer 0 -// planner.AddBuffer(reporter, 50, 2, 3); // Buffer 1 -// planner.AddBuffer(reporter, 50, 2, 3); // Buffer 2 -// -// int offset0; -// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 0, &offset0)); -// int offset1; -// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 1, &offset1)); -// int offset2; -// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 2, &offset2)); -// const int arena_size_needed = planner.GetMaximumMemorySize(); -// -// The goal is for applications to be able to experiment with different layout -// strategies without changing their client code, by swapping out classes that -// implement this interface.= -class MicroMemoryPlanner { - public: - MicroMemoryPlanner() {} - virtual ~MicroMemoryPlanner() {} - - // Pass information about a buffer's size and lifetime to the layout - // algorithm. The order this is called implicitly assigns an index to the - // result, so the buffer information that's passed into the N-th call of - // this method will be used as the buffer_index argument to - // GetOffsetForBuffer(). - virtual TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter, - int size, int first_time_used, - int last_time_used) = 0; - - // Record details of an offline planned buffer offset we want to place. - // offline_offset is the buffer offset from the start of the arena. - // This is to support offline memory planning from the flatbuffer metadata. - // By default, it returns an error. - virtual TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size, - int first_time_used, int last_time_used, - int offline_offset) { - return kTfLiteError; - } - - // The largest contiguous block of memory that's needed to hold the layout. - virtual size_t GetMaximumMemorySize() = 0; - // How many buffers have been added to the planner. - virtual int GetBufferCount() = 0; - // Calculated layout offset for the N-th buffer added to the planner. - virtual TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter, - int buffer_index, int* offset) = 0; - - // Provides the scratch buffer in case that the memory planner needs it. - // The lifetime of scratch buffers lifetime lasts until the static memory plan - // is committed. - // The default implementation is for the memory planner that does not need - // scratch buffer and simply returns ok. - virtual TfLiteStatus Init(unsigned char* scratch_buffer, - int scratch_buffer_size) { - return kTfLiteOk; - } - - virtual void PrintMemoryPlan() { - // Default does nothing. - } -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.cc b/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.cc deleted file mode 100644 index 700627a5..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.cc +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h" - -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -NonPersistentMemoryPlannerShim::NonPersistentMemoryPlannerShim( - const BufferPlan* buffer_plan) - : buffer_plan_(buffer_plan), buffer_request_count_(0) {} - -NonPersistentMemoryPlannerShim::~NonPersistentMemoryPlannerShim() {} - -TfLiteStatus NonPersistentMemoryPlannerShim::AddBuffer( - tflite::ErrorReporter* error_reporter, int size, int first_time_used, - int last_time_used) { - buffer_request_count_++; - if (buffer_request_count_ > buffer_plan_->buffer_count) { - MicroPrintf( - "Attempting to add buffer %d, but only %d buffers in given buffer " - "plan.", - buffer_request_count_, buffer_plan_->buffer_count); - return kTfLiteError; - } - return kTfLiteOk; -} - -size_t NonPersistentMemoryPlannerShim::GetMaximumMemorySize() { - // Simply return 0 to let the framework accept this memory plan - // because the client ensure validity of the memory plan. - return 0; -} - -// How many buffers are in the given memory plan. -int NonPersistentMemoryPlannerShim::GetBufferCount() { - return buffer_plan_->buffer_count; -} - -TfLiteStatus NonPersistentMemoryPlannerShim::GetOffsetForBuffer( - ErrorReporter* error_reporter, int buffer_request_index, int* offset) { - if (buffer_request_index >= buffer_plan_->buffer_count) { - MicroPrintf( - "Attempting to get offset for buffer %d, but only %d buffers in given " - "buffer plan.", - buffer_request_index, buffer_plan_->buffer_count); - return kTfLiteError; - } - *offset = buffer_plan_->buffer_plan_entries[buffer_request_index].offset; - return kTfLiteOk; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h b/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h deleted file mode 100644 index 945ac123..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h +++ /dev/null @@ -1,130 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_NON_PERSISTENT_MEMORY_PLANNER_SHIM_H__ -#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_NON_PERSISTENT_MEMORY_PLANNER_SHIM_H__ - -#include "tensorflow/lite/micro/compatibility.h" -#include "tensorflow/lite/micro/memory_planner/memory_plan_struct.h" -#include "tensorflow/lite/micro/memory_planner/micro_memory_planner.h" - -namespace tflite { - -/* This is an experimental feature and subjected to change. - * -The NonPersistentMemoryPlannerShim enables TFLM to work with an external tooling -that can plan the offset of each non persistent buffer for the Model within the -TFLM arena. - -If the NonPersistentMemoryPlannerShim is used, then the final binary does not -have any of the symbols associated with the GreedyMemoryPlanner which results in -a reduced memory footprint. - -Additionally, the offline planning of the non-persistent buffers can be used to -have a more efficient utilization compared to the GreedyMemoryPlanner. - -For example, consider the following hypothetical model: - -A1(400) A2(401) -──┬─────────┐ ┌─────────── - │ │ │ - │ │ │ - │ ▼ ▼ - │ ┌────────┐ - │ │ OP1 │ - │ └───┬────┘ A4(201) - │ A3(10) │ │ - │ │ │ - │ │ │ - │ ┌───┴────┐ │ - │ │ OP2 │◄────────┤ - │ └───┬────┘ │ - │ A5(11) │ A6(202) │ - │ │ │ │ - │ ▼ │ │ - │ ┌────────┐ │ │ - │ │ OP3 │◄─┘ │ - │ └───┬────┘ │ - │ │ A8(200) │ - │ A7(12) │ │ │ - │ │ │ │ - │ ┌───┴────┐◄──┘ │ - └──────►│ OP4 │ │ - └───┬────┘◄────────┘ - │ - A9(13) │ - ▼ - -The GreedyMemoryPlanner will give the following memory layout that requires 1012 -bytes of scratch arena size: - -┌─────────────────────────────────────────┬──────────────────────────┬────────┬───────┐ -│ A2(401) │ A1(400) │ A4(201)│ -A3(10)│ -└─────────────────────────────────────────┴──────────────────────────┴────────┴───────┘ - -┌───────────┬──────┬──────┐ -│ A6(202) │A5(11)│A7(12)│ -└───────────┴──────┴──────┘ - -┌──────────┬───────┐ -│ A8(200) │A9(13) │ -└──────────┴───────┘ - -But a more efficient offline memory plan that requires only 826 bytes of scratch -arena size can be - -┌──────────────────────────────────────┬─────────────────────────────┬───────┬──────┐ -│ A1(400) │ A2(401) │ -A3(10)│A5(11)│ -└──────────────────────────────────────┴─────────────────────────────┴───────┴──────┘ - - ┌────────────────┬────────────┬────────┬───────┐ - │A4(201) │ A8(200) │A9(13) -│A7(12) │ └────────────────┴────────────┴────────┴───────┘ - - ┌─────────────┐ - │ A6(202) │ - └─────────────┘ - -*/ -class NonPersistentMemoryPlannerShim : public MicroMemoryPlanner { - public: - // Does not take ownership of buffer_plan, which must refer to a valid - // BufferPlan that outlives this object. - explicit NonPersistentMemoryPlannerShim(const BufferPlan* buffer_plan); - ~NonPersistentMemoryPlannerShim() override; - - TfLiteStatus GetOffsetForBuffer(ErrorReporter* error_reporter, - int buffer_request_index, - int* offset) override; - - TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter, int size, - int first_time_used, int last_time_used) override; - size_t GetMaximumMemorySize() override; - int GetBufferCount() override; - - private: - const BufferPlan* buffer_plan_; // not owned, can't be null - - // The number of buffers requested so far. Used for error checking. - int buffer_request_count_; - - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_NON_PERSISTENT_MEMORY_PLANNER_SHIM_H__ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_allocation_info.h b/code/components/tflite-lib/tensorflow/lite/micro/micro_allocation_info.h deleted file mode 100644 index 4ea435b3..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_allocation_info.h +++ /dev/null @@ -1,152 +0,0 @@ -/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_ -#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/core/api/flatbuffer_conversions.h" -#include "tensorflow/lite/micro/compatibility.h" -#include "tensorflow/lite/micro/flatbuffer_utils.h" -#include "tensorflow/lite/micro/micro_allocator.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -// Used to hold information used during allocation calculations. -struct AllocationInfo { - size_t bytes; - void** output_ptr; - int first_created; - int last_used; - int32_t offline_offset; - bool needs_allocating; -}; - -// Used to hold the allocation info list and related metadata for the entire -// graph (including subgraphs). Since all subgraphs are planned together, the -// allocation info list contains allocations for all subgraphs. Track the offset -// into this list for each subgraph then reserve space to track all allocations. -// -// The AllocationInfo list is a contiguous list of allocations across all -// subgraphs and scratch buffers. Each element here is marked as -// st. The following is a possible -// AllocationInfo list: -// [s0t0, s0t1, s1t0, s2t1, s1t2, s3t0, s3t1, scratch0, scratch1, scratch2] -// -// For this example, the subgraph offsets would be [0, 2, 5] and the scratch -// offset would be 7. -struct GraphAllocationInfo { - AllocationInfo* allocation_info; - size_t allocation_info_count; - size_t* subgraph_offsets; - size_t scratch_offset; - size_t tensor_count; - size_t scratch_buffer_count; -}; - -// A helper class to construct AllocationInfo array. This array contains the -// lifetime of tensors / scratch_buffer and will be used to calculate the memory -// plan. Methods need to be called in order from `Create`, Init`, `Add*`, to -// `Finish`. -class AllocationInfoBuilder { - public: - AllocationInfoBuilder(const Model* model, - INonPersistentBufferAllocator* non_persistent_allocator, - ErrorReporter* reporter) - : model_(model), - non_persistent_allocator_(non_persistent_allocator) -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - , - reporter_(reporter) -#endif - { - } - - // Check if model contains offline planned buffer offsets. - // - If there's no metadata available, offline_planner_offsets is not set - // - If there's metadata available, offline_planner_offsets will point to the - // first offset in the metadata buffer list. - TfLiteStatus GetOfflinePlannedOffsets( - const int32_t** offline_planner_offsets); - - // Allocate memory for the allocation info array as well as offsets into that - // array for each subgraph. - TfLiteStatus CreateAllocationInfo(int scratch_buffer_request_count); - - // Release memory used for the allocation info array. - TfLiteStatus FreeAllocationInfo(); - - // Initialize AllocationInfo for all tensors and scratch buffers in the graph. - TfLiteStatus InitializeAllocationInfo(const int32_t* offline_offsets, - SubgraphAllocations* allocations); - - // Mark the scope of each tensor and scratch buffer across the graph. Enter - // all possible subgraphs invoked by each control flow operator. This method - // marks the maximum lifetime of each buffer so that tensors are correctly - // planned for all valid invocation flows. - TfLiteStatus MarkAllocationLifetimes( - int subgraph_idx, internal::ScratchBufferRequest* scratch_buffer_request, - ScratchBufferHandle* scratch_buffer_handles, - SubgraphAllocations* allocations); - - // Identify control flow operators and recursively mark all subgraphs which - // that operator can invoke. The lifetime of all tensors within a subgraph - // can only be extended. The order of subgraph invocation does not matter - // since subgraphs within the same control flow operator are executed - // within their own allocation scope (planned buffers in a subgraph cannot - // persist beyond the end of that subgraph's invocation). - TfLiteStatus MarkSubgraphLifetimesIfNecessary( - const Operator* op, - internal::ScratchBufferRequest* scratch_buffer_requests, - ScratchBufferHandle* scratch_buffer_handles, - SubgraphAllocations* allocations); - - // Returns the number of allocations. - int AllocationCount() const { return info_.allocation_info_count; } - - // Returns a pointer to the built AllocationInfo array. - AllocationInfo* Finish() const { return info_.allocation_info; } - - private: - // Mark the given Allocation info as first created at the specified allocation - // scope count. Only the first creation must be recorded since the allocation - // scope count monotonically increases throughout the lifetime marking - // process. - void UpdateFirstCreated(AllocationInfo* current, int allocation_scope_count); - - // Mark the given AllocationInfo as last used at the specified allocation - // scope - // count. Update the last used marker every time, since the allocation scope - // count monotonically increases through the lifetime marking process. - void UpdateLastUsed(AllocationInfo* current, int allocation_scope_count); - - // Validate if a subgraph satisfies assumptions. - TfLiteStatus ValidateSubgraph(const SubGraph* subgraph, - TfLiteEvalTensor* eval_tensors); - - const tflite::Model* model_ = nullptr; - INonPersistentBufferAllocator* non_persistent_allocator_ = nullptr; -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - ErrorReporter* reporter_ = nullptr; -#endif - - GraphAllocationInfo info_; - int allocation_scope_count_ = 0; -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_arena_constants.h b/code/components/tflite-lib/tensorflow/lite/micro/micro_arena_constants.h deleted file mode 100644 index 82828176..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_arena_constants.h +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_MICRO_ARENA_CONSTANTS_H_ -#define TENSORFLOW_LITE_MICRO_MICRO_ARENA_CONSTANTS_H_ - -namespace tflite { - -// The default buffer alignment requirement. -// We align tensor buffers to 16-byte boundaries, since this is a common -// requirement for SIMD extensions. -constexpr int MicroArenaBufferAlignment() { return 16; } - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MICRO_ARENA_CONSTANTS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_context.cc b/code/components/tflite-lib/tensorflow/lite/micro/micro_context.cc deleted file mode 100644 index 9ec694b8..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_context.cc +++ /dev/null @@ -1,129 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/micro_context.h" - -#include -#include -#include - -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { -MicroContext::MicroContext(MicroAllocator* allocator, const Model* model, - MicroGraph* graph) - : allocator_(*allocator), graph_(*graph), model_(model) {} - -MicroContext::~MicroContext() {} - -void* MicroContext::AllocatePersistentBuffer(size_t bytes) { - return allocator_.AllocatePersistentBuffer(bytes); -} - -TfLiteStatus MicroContext::RequestScratchBufferInArena(size_t bytes, - int* buffer_idx) { - return allocator_.RequestScratchBufferInArena( - bytes, graph_.GetCurrentSubgraphIndex(), buffer_idx); -} - -void* MicroContext::GetScratchBuffer(int buffer_idx) { - ScratchBufferHandle* handle = scratch_buffer_handles_ + buffer_idx; - return handle->data; -} - -TfLiteTensor* MicroContext::AllocateTempTfLiteTensor(int tensor_idx) { - return allocator_.AllocateTempTfLiteTensor(model_, graph_.GetAllocations(), - tensor_idx, - graph_.GetCurrentSubgraphIndex()); -} - -int MicroContext::GetTensorIndex(int index, int max_size, - const int* tensor_indices) { - if (index >= 0 && index < max_size) { - const int tensor_index = tensor_indices[index]; - if (tensor_index != kTfLiteOptionalTensor) { - return tensor_index; - } - } - return -1; -} - -TfLiteTensor* MicroContext::AllocateTempInputTensor(const TfLiteNode* node, - int index) { - const int tensor_index = - GetTensorIndex(index, node->inputs->size, node->inputs->data); - if (tensor_index < 0) { - return nullptr; - } - return AllocateTempTfLiteTensor(tensor_index); -} - -TfLiteTensor* MicroContext::AllocateTempOutputTensor(const TfLiteNode* node, - int index) { - const int tensor_index = - GetTensorIndex(index, node->outputs->size, node->outputs->data); - if (tensor_index < 0) { - return nullptr; - } - return AllocateTempTfLiteTensor(tensor_index); -} - -TfLiteTensor* MicroContext::AllocateTempIntermediateTensor( - const TfLiteNode* node, int index) { - const int tensor_index = GetTensorIndex(index, node->intermediates->size, - node->intermediates->data); - if (tensor_index < 0) { - return nullptr; - } - return AllocateTempTfLiteTensor(tensor_index); -} - -void MicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) { - return allocator_.DeallocateTempTfLiteTensor(tensor); -} - -TfLiteEvalTensor* MicroContext::GetEvalTensor(int tensor_idx) { - return &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()] - .tensors[tensor_idx]; -} - -void MicroContext::SetScratchBufferHandles( - ScratchBufferHandle* scratch_buffer_handles) { - scratch_buffer_handles_ = scratch_buffer_handles; -} - -TfLiteStatus MicroContext::set_external_context( - void* external_context_payload) { - if (external_context_payload == nullptr || - external_context_payload_ != nullptr) { - MicroPrintf( - "Attempting to set external context to %x but it was %x already", - external_context_payload, external_context_payload_); - return kTfLiteError; - } - - external_context_payload_ = external_context_payload; - return kTfLiteOk; -} - -void MicroContextReportOpError(struct TfLiteContext* context, - const char* format, ...) { - va_list args; - va_start(args, format); - GetMicroErrorReporter()->Report(format, args); - va_end(args); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_context.h b/code/components/tflite-lib/tensorflow/lite/micro/micro_context.h deleted file mode 100644 index e7be6544..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_context.h +++ /dev/null @@ -1,161 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_ -#define TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/micro_allocator.h" -#include "tensorflow/lite/micro/micro_graph.h" - -namespace tflite { -// MicroContext is eventually going to become the API between TFLM and the -// kernels, replacing all the functions in TfLiteContext. The end state is code -// kernels to have code like: -// -// MicroContext* micro_context = GetMicroContext(context); -// micro_context-> -class MicroContext { - public: - // Does not take any ownership, and all pointers must refer to valid objects - // that outlive the one constructed. - explicit MicroContext(MicroAllocator* allocator, const Model* model, - MicroGraph* graph); - virtual ~MicroContext(); - - // Allocate persistent buffer which has the same life time as the interpreter. - // Returns nullptr on failure. - // The memory is allocated from the tail. - // This method is only available in Init or Prepare stage. - // Virtual so that it can be faked for kernel tests. - virtual void* AllocatePersistentBuffer(size_t bytes); - - // Request a scratch buffer in the arena through static memory planning. - // This method is only available in Prepare stage and the buffer is allocated - // by the interpreter between Prepare and Eval stage. In Eval stage, - // GetScratchBuffer API can be used to fetch the address. - // Virtual so that it can be faked for kernel tests. - virtual TfLiteStatus RequestScratchBufferInArena(size_t bytes, - int* buffer_idx); - - // Get the scratch buffer pointer. - // This method is only available in Eval stage. - // Virtual so that it can be faked for kernel tests. - virtual void* GetScratchBuffer(int buffer_idx); - - // Returns a temporary TfLiteTensor struct for a given index. - // Virtual so that it can be faked for kernel tests. - virtual TfLiteTensor* AllocateTempTfLiteTensor(int tensor_idx); - - // Returns a temporary TfLiteTensor struct for the specified input tensor of a - // given mode. This is the recommended API over the deprecated - // GetInput/GetInputSafe to get a temp input tensor. The returned tensor shall - // be freed via calling DeallocateTempTfLiteTensor. - virtual TfLiteTensor* AllocateTempInputTensor(const TfLiteNode* node, - int index); - - // Returns a temporary TfLiteTensor struct for the specified output tensor of - // a given mode. This is the recommended API over the deprecated - // GetOutput/GetOutputSafe to get a temp output tensor. The returned tensor - // shall be freed via calling DeallocateTempTfLiteTensor. - virtual TfLiteTensor* AllocateTempOutputTensor(const TfLiteNode* node, - int index); - - // Returns a temporary TfLiteTensor struct for the specified intermediate - // tensor of a given mode. This is the recommended API over the deprecated - // GetIntermediates/GetIntermediatesSafe to get a temp intermediate tensor. - // The returned tensor shall be freed via calling DeallocateTempTfLiteTensor. - virtual TfLiteTensor* AllocateTempIntermediateTensor(const TfLiteNode* node, - int index); - - // Deallocates a temp TfLiteTensor. - // Virtual so that it can be faked for kernel tests. - virtual void DeallocateTempTfLiteTensor(TfLiteTensor* tensor); - - // Returns a TfLiteEvalTensor struct for a given index. - // Virtual so that it can be faked for kernel tests. - virtual TfLiteEvalTensor* GetEvalTensor(int tensor_idx); - - // Does not take ownership of the pointer and the pointer must refer to valid - // an object that outlive this class instance. - // This can only be called once to set one external context. - TfLiteStatus set_external_context(void* external_context_payload); - - void* external_context() { return external_context_payload_; } - - MicroGraph& graph() { return graph_; } - - // Sets the pointer to a list of ScratchBufferHandle instances. - // Not API between TFLM and kernels. Primarily used by the framework for - // housekeeping in MicroContext. - void SetScratchBufferHandles(ScratchBufferHandle* scratch_buffer_handles); - - private: - // Return the tensor index as tensor_indices[index]. tensor_indices is of - // max_size. Return -1 if index is not in the valid range of tensor_indices. - int GetTensorIndex(int index, int max_size, const int* tensor_indices); - - MicroAllocator& allocator_; - MicroGraph& graph_; - const Model* model_; - - ScratchBufferHandle* scratch_buffer_handles_ = nullptr; - void* external_context_payload_ = nullptr; - - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -inline MicroContext* GetMicroContext(const struct TfLiteContext* context) { - return reinterpret_cast(context->impl_); -} - -// Deprecated API. Prefer to using the MicroContext API directly from the -// kernels. -// TODO(b/213010668): migrate all existing kernels to use MicroContext, delete -// these functions, and remove corresponding members from the TfLiteContext -// struct for TFLM. -inline void* MicroContextAllocatePersistentBuffer(TfLiteContext* ctx, - size_t bytes) { - return GetMicroContext(ctx)->AllocatePersistentBuffer(bytes); -} -inline TfLiteStatus MicroContextRequestScratchBufferInArena(TfLiteContext* ctx, - size_t bytes, - int* buffer_idx) { - return GetMicroContext(ctx)->RequestScratchBufferInArena(bytes, buffer_idx); -} -inline void* MicroContextGetScratchBuffer(TfLiteContext* ctx, int buffer_idx) { - return GetMicroContext(ctx)->GetScratchBuffer(buffer_idx); -} -inline TfLiteTensor* MicroContextGetTensor(const struct TfLiteContext* context, - int tensor_idx) { - return GetMicroContext(context)->AllocateTempTfLiteTensor(tensor_idx); -} -inline TfLiteEvalTensor* MicroContextGetEvalTensor( - const struct TfLiteContext* context, int tensor_idx) { - return GetMicroContext(context)->GetEvalTensor(tensor_idx); -} -inline TfLiteExternalContext* MicroContextGetExternalContext( - TfLiteContext* context, TfLiteExternalContextType unused) { - return reinterpret_cast( - GetMicroContext(context)->external_context()); -} - -// Requests that an error be reported with format string msg. -void MicroContextReportOpError(struct TfLiteContext* context, - const char* format, ...); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_error_reporter.cc b/code/components/tflite-lib/tensorflow/lite/micro/micro_error_reporter.cc deleted file mode 100644 index 5aba058d..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_error_reporter.cc +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/micro_error_reporter.h" - -#include -#include -#include - -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) -#include "tensorflow/lite/micro/debug_log.h" -#include "tensorflow/lite/micro/micro_string.h" -#endif - -namespace { -uint8_t micro_error_reporter_buffer[sizeof(tflite::MicroErrorReporter)]; -tflite::MicroErrorReporter* error_reporter_ = nullptr; - -void Log(const char* format, va_list args) { -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - // Only pulling in the implementation of this function for builds where we - // expect to make use of it to be extra cautious about not increasing the code - // size. - static constexpr int kMaxLogLen = 256; - char log_buffer[kMaxLogLen]; - MicroVsnprintf(log_buffer, kMaxLogLen, format, args); - DebugLog(log_buffer); - DebugLog("\r\n"); -#endif -} - -} // namespace - -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) -void MicroPrintf(const char* format, ...) { - va_list args; - va_start(args, format); - Log(format, args); - va_end(args); -} -#endif - -namespace tflite { -ErrorReporter* GetMicroErrorReporter() { - if (error_reporter_ == nullptr) { - error_reporter_ = new (micro_error_reporter_buffer) MicroErrorReporter(); - } - return error_reporter_; -} - -int MicroErrorReporter::Report(const char* format, va_list args) { - Log(format, args); - return 0; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_error_reporter.h b/code/components/tflite-lib/tensorflow/lite/micro/micro_error_reporter.h deleted file mode 100644 index 0e3b0c38..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_error_reporter.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_ -#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_ - -#include - -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/micro/compatibility.h" - -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) -// This function can be used independent of the MicroErrorReporter to get -// printf-like functionalitys and are common to all target platforms. -void MicroPrintf(const char* format, ...); -#else -// We use a #define to ensure that the strings are completely stripped, to -// prevent an unnecessary increase in the binary size. -#define MicroPrintf(...) tflite::Unused(__VA_ARGS__) -#endif - -namespace tflite { - -// From -// https://stackoverflow.com/questions/23235910/variadic-unused-function-macro -template -void Unused(Args&&... args) { - (void)(sizeof...(args)); -} - -// Get a pointer to a singleton global error reporter. -ErrorReporter* GetMicroErrorReporter(); - -class MicroErrorReporter : public ErrorReporter { - public: - ~MicroErrorReporter() override {} - int Report(const char* format, va_list args) override; - - private: - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_graph.cc b/code/components/tflite-lib/tensorflow/lite/micro/micro_graph.cc deleted file mode 100644 index d9b2176e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_graph.cc +++ /dev/null @@ -1,248 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/micro_graph.h" - -#include "flatbuffers/flatbuffers.h" // from @flatbuffers -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/micro/flatbuffer_utils.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_profiler.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { -namespace { - -const char* OpNameFromRegistration(const TfLiteRegistration* registration) { - if (registration->builtin_code == BuiltinOperator_CUSTOM) { - return registration->custom_name; - } else { - return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code)); - } -} - -} // namespace - -MicroGraph::MicroGraph(TfLiteContext* context, const Model* model, - MicroAllocator* allocator, - MicroResourceVariables* resource_variables) - : context_(context), - model_(model), - allocator_(allocator), - current_subgraph_index_(0), - resource_variables_(resource_variables) { - if (model != nullptr) { - subgraphs_ = model->subgraphs(); - } -} - -MicroGraph::~MicroGraph() {} - -TfLiteStatus MicroGraph::InitSubgraphs() { - int previous_subgraph_idx = current_subgraph_index_; - - for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size(); - subgraph_idx++) { - current_subgraph_index_ = subgraph_idx; - uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx); - for (size_t i = 0; i < operators_size; ++i) { - TfLiteNode* node = - &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); - const TfLiteRegistration* registration = - subgraph_allocations_[subgraph_idx] - .node_and_registrations[i] - .registration; - size_t init_data_size; - const char* init_data; - if (registration->builtin_code == BuiltinOperator_CUSTOM) { - init_data = reinterpret_cast(node->custom_initial_data); - init_data_size = node->custom_initial_data_size; - } else { - init_data = reinterpret_cast(node->builtin_data); - init_data_size = 0; - } - if (registration->init) { - node->user_data = - registration->init(context_, init_data, init_data_size); - } - } - } - current_subgraph_index_ = previous_subgraph_idx; - - return kTfLiteOk; -} - -TfLiteStatus MicroGraph::PrepareSubgraphs() { - int previous_subgraph_idx = current_subgraph_index_; - - for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size(); - subgraph_idx++) { - current_subgraph_index_ = subgraph_idx; - uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx); - for (size_t i = 0; i < operators_size; ++i) { - TfLiteNode* node = - &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); - const TfLiteRegistration* registration = - subgraph_allocations_[subgraph_idx] - .node_and_registrations[i] - .registration; - if (registration->prepare != nullptr) { - TfLiteStatus prepare_status = registration->prepare(context_, node); - if (prepare_status != kTfLiteOk) { - MicroPrintf("Node %s (number %df) failed to prepare with status %d", - OpNameFromRegistration(registration), i, prepare_status); - return kTfLiteError; - } - } - allocator_->FinishPrepareNodeAllocations(/*node_id=*/i); - } - } - current_subgraph_index_ = previous_subgraph_idx; - - return kTfLiteOk; -} - -TfLiteStatus MicroGraph::FreeSubgraphs() { - int previous_subgraph_idx = current_subgraph_index_; - - for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size(); - subgraph_idx++) { - current_subgraph_index_ = subgraph_idx; - uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx); - for (size_t i = 0; i < operators_size; ++i) { - TfLiteNode* node = - &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); - const TfLiteRegistration* registration = - subgraph_allocations_[subgraph_idx] - .node_and_registrations[i] - .registration; - // registration is allocated outside the interpreter, so double check to - // make sure it's not nullptr; - if (registration != nullptr && registration->free != nullptr) { - registration->free(context_, node->user_data); - } - } - } - current_subgraph_index_ = previous_subgraph_idx; - - return kTfLiteOk; -} - -TfLiteStatus MicroGraph::InvokeSubgraph(int subgraph_idx) { - int previous_subgraph_idx = current_subgraph_index_; - current_subgraph_index_ = subgraph_idx; - - if (static_cast(subgraph_idx) >= subgraphs_->size()) { - MicroPrintf("Accessing subgraph %d but only %d subgraphs found", - subgraph_idx, subgraphs_->size()); - return kTfLiteError; - } - uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx); - for (size_t i = 0; i < operators_size; ++i) { - TfLiteNode* node = - &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node); - const TfLiteRegistration* registration = subgraph_allocations_[subgraph_idx] - .node_and_registrations[i] - .registration; - -// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with -// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is -// only defined for builds with the error strings. -#if !defined(TF_LITE_STRIP_ERROR_STRINGS) - ScopedMicroProfiler scoped_profiler( - OpNameFromRegistration(registration), - reinterpret_cast(context_->profiler)); -#endif - - TFLITE_DCHECK(registration->invoke); - TfLiteStatus invoke_status = registration->invoke(context_, node); - - // All TfLiteTensor structs used in the kernel are allocated from temp - // memory in the allocator. This creates a chain of allocations in the - // temp section. The call below resets the chain of allocations to - // prepare for the next call. - allocator_->ResetTempAllocations(); - - if (invoke_status == kTfLiteError) { - MicroPrintf("Node %s (number %d) failed to invoke with status %d", - OpNameFromRegistration(registration), i, invoke_status); - return kTfLiteError; - } else if (invoke_status != kTfLiteOk) { - return invoke_status; - } - } - current_subgraph_index_ = previous_subgraph_idx; - return kTfLiteOk; -} - -TfLiteStatus MicroGraph::ResetVariableTensors() { - for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size(); - subgraph_idx++) { - const SubGraph* subgraph = (*subgraphs_)[subgraph_idx]; - for (size_t i = 0; i < subgraph->tensors()->size(); ++i) { - auto* tensor = subgraph->tensors()->Get(i); - if (tensor->is_variable()) { - size_t buffer_size; - TF_LITE_ENSURE_STATUS(TfLiteEvalTensorByteLength( - &subgraph_allocations_[subgraph_idx].tensors[i], &buffer_size)); - - int value = 0; - if (tensor->type() == tflite::TensorType_INT8) { - value = tensor->quantization()->zero_point()->Get(0); - } - memset(subgraph_allocations_[subgraph_idx].tensors[i].data.raw, value, - buffer_size); - } - } - } - if (resource_variables_ != nullptr) { - resource_variables_->ResetAll(); - } - - return kTfLiteOk; -} - -int MicroGraph::NumSubgraphs() { return model_->subgraphs()->size(); } - -void MicroGraph::SetSubgraphAllocations( - SubgraphAllocations* subgraph_allocations) { - subgraph_allocations_ = subgraph_allocations; -} - -size_t MicroGraph::NumSubgraphInputs(int subgraph_idx) { - return model_->subgraphs()->Get(subgraph_idx)->inputs()->size(); -} - -TfLiteEvalTensor* MicroGraph::GetSubgraphInput(int subgraph_idx, - int input_idx) { - int tensor_idx = - model_->subgraphs()->Get(subgraph_idx)->inputs()->Get(input_idx); - return &subgraph_allocations_[subgraph_idx].tensors[tensor_idx]; -} - -size_t MicroGraph::NumSubgraphOutputs(int subgraph_idx) { - return model_->subgraphs()->Get(subgraph_idx)->outputs()->size(); -} - -TfLiteEvalTensor* MicroGraph::GetSubgraphOutput(int subgraph_idx, - int output_idx) { - int tensor_idx = - model_->subgraphs()->Get(subgraph_idx)->outputs()->Get(output_idx); - return &subgraph_allocations_[subgraph_idx].tensors[tensor_idx]; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_graph.h b/code/components/tflite-lib/tensorflow/lite/micro/micro_graph.h deleted file mode 100644 index 942082ac..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_graph.h +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_ -#define TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/micro_allocator.h" -#include "tensorflow/lite/micro/micro_resource_variable.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -// Abstracts the details of interacting with the tflite::Model. -// -// Provides methods to access, initialize, prepare, invoke and free any -// subgraph in the tflite::Graph. -class MicroGraph { - public: - // The lifetime of the context, model, allocator and resource_variables must - // be at least as long as that of the graph object, since the this class may - // need to access them at any time. If resource_variables is a nullptr, - // GetResourceVariables will return a nullptr. - MicroGraph(TfLiteContext* context, const Model* model, - MicroAllocator* allocator, - MicroResourceVariables* resource_variables); - virtual ~MicroGraph(); - - // Sets up builtin data and calls TfLiteRegistration->Init for every operator - // in every subgraph in the model. - virtual TfLiteStatus InitSubgraphs(); - - // Calls TfLiteRegistration->Prepare for every operator in every subgraph in - // the model. - virtual TfLiteStatus PrepareSubgraphs(); - - // Calls TfLiteRegistration->Free for every operator in every subgraph in the - // model. - virtual TfLiteStatus FreeSubgraphs(); - - // Calls TfLiteRegistration->Invoke for every operator in a single subgraph in - // the model. - virtual TfLiteStatus InvokeSubgraph(int subgraph_idx); - - // Zeros out all variable tensors in all subgraphs in the model. - virtual TfLiteStatus ResetVariableTensors(); - - // Number of tensor inputs to a specified subgraph in the model. - virtual size_t NumSubgraphInputs(int subgraph_idx); - - // Get the specified input tensor of a specified subgraph in the model. - virtual TfLiteEvalTensor* GetSubgraphInput(int subgraph_idx, int input_idx); - - // Number of tensor outputs from a specified subgraph in the model. - virtual size_t NumSubgraphOutputs(int subgraph_idx); - - // Get the specified output tensor of a specified subgraph in the model. - virtual TfLiteEvalTensor* GetSubgraphOutput(int subgraph_idx, int output_idx); - - // Number of subgraphs in the model. - virtual int NumSubgraphs(); - - // Hook to pass in subgraph allocations tracked within the interpreter, - // allowing MicroGraph to init / prepare / invoke subgraphs in the model. - void SetSubgraphAllocations(SubgraphAllocations* subgraph_allocations); - - // Get the current subgraph index. Within an on operator, this is guaranteed - // to be the subgraph of that operator. - int GetCurrentSubgraphIndex() { return current_subgraph_index_; } - - // Gets the list of alloctions for each subgraph. This is the source of truth - // for all per-subgraph allocation data. - SubgraphAllocations* GetAllocations() { return subgraph_allocations_; } - - // Get the resource variables for this TFLM graph. - MicroResourceVariables* GetResourceVariables() { return resource_variables_; } - - private: - TfLiteContext* context_; - const Model* model_; - MicroAllocator* allocator_; - SubgraphAllocations* subgraph_allocations_ = nullptr; - int current_subgraph_index_; - MicroResourceVariables* resource_variables_; - const flatbuffers::Vector>* subgraphs_; - - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_op_resolver.h b/code/components/tflite-lib/tensorflow/lite/micro/micro_op_resolver.h deleted file mode 100644 index 757b6b89..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_op_resolver.h +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_ -#define TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/core/api/flatbuffer_conversions.h" -#include "tensorflow/lite/core/api/op_resolver.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -// This is an interface for the OpResolver for TFLiteMicro. The differences from -// the TFLite OpResolver base class are to: -// * explicitly remove support for Op versions -// * allow for finer grained registration of the Builtin Ops to reduce code -// size for TFLiteMicro. -// -// We need an interface class instead of directly using MicroMutableOpResolver -// because MicroMutableOpResolver is a class template with the number of -// registered Ops as the template parameter. -class MicroOpResolver : public OpResolver { - public: - typedef TfLiteStatus (*BuiltinParseFunction)(const Operator* op, - ErrorReporter* error_reporter, - BuiltinDataAllocator* allocator, - void** builtin_data); - - // Returns the Op registration struct corresponding to the enum code from the - // flatbuffer schema. Returns nullptr if the op is not found or if op == - // BuiltinOperator_CUSTOM. - virtual const TfLiteRegistration* FindOp(BuiltinOperator op) const = 0; - - // Returns the Op registration struct corresponding to the custom operator by - // name. - virtual const TfLiteRegistration* FindOp(const char* op) const = 0; - - // This implementation exists for compatibility with the OpResolver base class - // and disregards the version parameter. - const TfLiteRegistration* FindOp(BuiltinOperator op, - int version) const final { - return FindOp(op); - } - - // This implementation exists for compatibility with the OpResolver base class - // and disregards the version parameter. - const TfLiteRegistration* FindOp(const char* op, int version) const final { - return FindOp(op); - } - - // Returns the operator specific parsing function for the OpData for a - // BuiltinOperator (if registered), else nullptr. - virtual BuiltinParseFunction GetOpDataParser(BuiltinOperator op) const = 0; - - ~MicroOpResolver() override {} -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_resource_variable.cc b/code/components/tflite-lib/tensorflow/lite/micro/micro_resource_variable.cc deleted file mode 100644 index c4577773..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_resource_variable.cc +++ /dev/null @@ -1,148 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/micro_resource_variable.h" - -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" -#include "tensorflow/lite/micro/micro_utils.h" - -namespace tflite { - -namespace {} // namespace - -MicroResourceVariables* MicroResourceVariables::Create( - MicroAllocator* allocator, int max_num_variables) { - TFLITE_DCHECK(allocator != nullptr); - - uint8_t* allocator_buffer = static_cast( - allocator->AllocatePersistentBuffer(sizeof(MicroResourceVariables))); - MicroResourceVariable* variable_array = - static_cast(allocator->AllocatePersistentBuffer( - sizeof(MicroResourceVariable) * max_num_variables)); - MicroResourceVariables* variables = new (allocator_buffer) - MicroResourceVariables(variable_array, max_num_variables); - return variables; -} - -int MicroResourceVariables::CreateIdIfNoneFound(const char* container, - const char* shared_name) { - int resource_id = FindId(container, shared_name); - if (resource_id >= 0) { - return resource_id; - } - - // no existing variable found for the given container and shared name pair. - if (num_resource_variables_ >= max_variable_count_) { - MicroPrintf( - "Failed to allocate resource variable. Maximum resource variable count " - "(%d) " - "reached.", - max_variable_count_); - return -1; - } - - resource_id = num_resource_variables_++; - resource_variables_[resource_id].container = container; - resource_variables_[resource_id].shared_name = shared_name; - resource_variables_[resource_id].resource_buffer = nullptr; - resource_variables_[resource_id].bytes = 0; - return resource_id; -} - -TfLiteStatus MicroResourceVariables::Read(int id, - const TfLiteEvalTensor* tensor) { - if (id < 0 || id >= num_resource_variables_) { - MicroPrintf("Attempting to read non-existent resource variable %d", id); - return kTfLiteError; - } - MicroResourceVariable variable = resource_variables_[id]; - TFLITE_DCHECK(EvalTensorBytes(tensor) == variable.bytes); - TFLITE_DCHECK(variable.resource_buffer != nullptr); - memcpy(tensor->data.raw, variable.resource_buffer, variable.bytes); - return kTfLiteOk; -} - -TfLiteStatus MicroResourceVariables::Allocate(int id, TfLiteContext* context, - const TfLiteTensor* tensor) { - if (id < 0 || id >= num_resource_variables_) { - MicroPrintf("Attempting to read non-existent resource variable %d", id); - return kTfLiteError; - } - - MicroResourceVariable& variable = resource_variables_[id]; - - if (variable.resource_buffer == nullptr) { - variable.bytes = tensor->bytes; - variable.resource_buffer = - context->AllocatePersistentBuffer(context, tensor->bytes); - if (variable.resource_buffer == nullptr) { - MicroPrintf("Failed to allocate resource buffer."); - return kTfLiteError; - } - // Zero out resource buffers by deafult. Buffers can be initialized to - // nonzero values using ASSIGN_VARIABLE. - memset(variable.resource_buffer, 0, variable.bytes); - } - - return kTfLiteOk; -} - -TfLiteStatus MicroResourceVariables::Assign(int id, - const TfLiteEvalTensor* tensor) { - if (id < 0 || id >= num_resource_variables_) { - MicroPrintf("Attempting to read non-existent resource variable %d", id); - return kTfLiteError; - } - MicroResourceVariable variable = resource_variables_[id]; - - if (variable.resource_buffer == nullptr) { - MicroPrintf( - "Attempting to assign from a TfLiteEvalTensor before the resource " - "buffer has been allocated. Make sure to call AssignResourceVariable " - "with a TfLiteTensor first."); - return kTfLiteError; - } - TFLITE_DCHECK(EvalTensorBytes(tensor) == variable.bytes); - memcpy(variable.resource_buffer, tensor->data.raw, variable.bytes); - return kTfLiteOk; -} - -TfLiteStatus MicroResourceVariables::ResetAll() { - for (int i = 0; i < num_resource_variables_; i++) { - MicroResourceVariable variable = resource_variables_[i]; - memset(variable.resource_buffer, 0, variable.bytes); - } - return kTfLiteOk; -} - -int MicroResourceVariables::FindId(const char* container, - const char* shared_name) { - for (int i = 0; i < num_resource_variables_; i++) { - // Some TFLite flatbuffers contain null container names to save space. - if ((container == nullptr || - !strcmp(container, resource_variables_[i].container)) && - !strcmp(shared_name, resource_variables_[i].shared_name)) { - return i; - } - } - return -1; -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_resource_variable.h b/code/components/tflite-lib/tensorflow/lite/micro/micro_resource_variable.h deleted file mode 100644 index e8df991c..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_resource_variable.h +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TFLITE_MICRO_TENSORFLOW_LITE_MICRO_MICRO_RESOURCE_H_ -#define TFLITE_MICRO_TENSORFLOW_LITE_MICRO_MICRO_RESOURCE_H_ - -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/micro_allocator.h" - -namespace tflite { - -class MicroResourceVariables { - public: - // Create - static MicroResourceVariables* Create(MicroAllocator* allocator, - int num_variables); - - // Creates a resource variable if none is available for the given container - // and shared name pair. Returns the resource ID corresponding to the - // container and shared name pair. If allocation fails, the returned resource - // ID will be negative. The the container and shared_name must outlive this - // class. - int CreateIdIfNoneFound(const char* container, const char* shared_name); - - // Read the resource buffer associated with the given ID into the given - // tensor. - TfLiteStatus Read(int id, const TfLiteEvalTensor* tensor); - - // Allocates the resource buffer if none has been allocated, based on the - // length of the input tensor. Copies input tensor contents to the resource - // buffer. - TfLiteStatus Allocate(int id, TfLiteContext* context, - const TfLiteTensor* tensor); - - // Copies input tensor contents to the resource buffer. - // AllocateResourceVariable with a TFLite tensor must have been called first - // in order to allocate the resource buffer. - TfLiteStatus Assign(int id, const TfLiteEvalTensor* tensor); - - // Zeros out all resource buffers. - TfLiteStatus ResetAll(); - - private: - int FindId(const char* container, const char* shared_name); - - // Micro resource contains the mapping between resource container/name strings - // and resouce IDs. Each resource ID corresponds to a resource buffer pointer. - // The resouce ID is created during the VAR_HANDLE operator preparation stage. - // The resource buffer pointer is created during ASSIGN_VARIABLE preparation - // stage based on the size of the TFLiteTensor being assigned. - struct MicroResourceVariable { - const char* container; - const char* shared_name; - void* resource_buffer; - - // This is only for verifying read size. - size_t bytes; - }; - - MicroResourceVariables(MicroResourceVariable* variables, - int max_variable_count) - : resource_variables_(variables), - max_variable_count_(max_variable_count), - num_resource_variables_(0) {} - - MicroResourceVariable* resource_variables_; - int max_variable_count_; - int num_resource_variables_; -}; - -} // namespace tflite - -#endif // TFLITE_MICRO_TENSORFLOW_LITE_MICRO_MICRO_RESOURCE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_string.cc b/code/components/tflite-lib/tensorflow/lite/micro/micro_string.cc deleted file mode 100644 index bb41a9e3..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_string.cc +++ /dev/null @@ -1,317 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Implements debug logging for numbers by converting them into strings and then -// calling the main DebugLog(char*) function. These are separated into a -// different file so that platforms can just implement the string output version -// of DebugLog() and then get the numerical variations without requiring any -// more code. - -#include "tensorflow/lite/micro/micro_string.h" - -#include -#include -#include - -namespace { - -// Int formats can need up to 10 bytes for the value plus a single byte for the -// sign. -constexpr int kMaxIntCharsNeeded = 10 + 1; -// Hex formats can need up to 8 bytes for the value plus two bytes for the "0x". -constexpr int kMaxHexCharsNeeded = 8 + 2; - -// Float formats can need up to 7 bytes for the fraction plus 3 bytes for "x2^" -// plus 3 bytes for the exponent and a single sign bit. -constexpr float kMaxFloatCharsNeeded = 7 + 3 + 3 + 1; - -// All input buffers to the number conversion functions must be this long. -const int kFastToBufferSize = 48; - -// Reverses a zero-terminated string in-place. -char* ReverseStringInPlace(char* start, char* end) { - char* p1 = start; - char* p2 = end - 1; - while (p1 < p2) { - char tmp = *p1; - *p1++ = *p2; - *p2-- = tmp; - } - return start; -} - -// Appends a string to a string, in-place. You need to pass in the maximum -// string length as the second argument. -char* StrCatStr(char* main, int main_max_length, const char* to_append) { - char* current = main; - while (*current != 0) { - ++current; - } - char* current_end = main + (main_max_length - 1); - while ((*to_append != 0) && (current < current_end)) { - *current = *to_append; - ++current; - ++to_append; - } - *current = 0; - return current; -} - -// Populates the provided buffer with an ASCII representation of the number. -char* FastUInt32ToBufferLeft(uint32_t i, char* buffer, int base) { - char* start = buffer; - do { - int32_t digit = i % base; - char character; - if (digit < 10) { - character = '0' + digit; - } else { - character = 'a' + (digit - 10); - } - *buffer++ = character; - i /= base; - } while (i > 0); - *buffer = 0; - ReverseStringInPlace(start, buffer); - return buffer; -} - -// Populates the provided buffer with an ASCII representation of the number. -char* FastInt32ToBufferLeft(int32_t i, char* buffer) { - uint32_t u = i; - if (i < 0) { - *buffer++ = '-'; - u = -u; - } - return FastUInt32ToBufferLeft(u, buffer, 10); -} - -// Converts a number to a string and appends it to another. -char* StrCatInt32(char* main, int main_max_length, int32_t number) { - char number_string[kFastToBufferSize]; - FastInt32ToBufferLeft(number, number_string); - return StrCatStr(main, main_max_length, number_string); -} - -// Converts a number to a string and appends it to another. -char* StrCatUInt32(char* main, int main_max_length, uint32_t number, int base) { - char number_string[kFastToBufferSize]; - FastUInt32ToBufferLeft(number, number_string, base); - return StrCatStr(main, main_max_length, number_string); -} - -// Populates the provided buffer with ASCII representation of the float number. -// Avoids the use of any floating point instructions (since these aren't -// supported on many microcontrollers) and as a consequence prints values with -// power-of-two exponents. -char* FastFloatToBufferLeft(float f, char* buffer) { - char* current = buffer; - char* current_end = buffer + (kFastToBufferSize - 1); - // Access the bit fields of the floating point value to avoid requiring any - // float instructions. These constants are derived from IEEE 754. - const uint32_t sign_mask = 0x80000000; - const uint32_t exponent_mask = 0x7f800000; - const int32_t exponent_shift = 23; - const int32_t exponent_bias = 127; - const uint32_t fraction_mask = 0x007fffff; - uint32_t u; - memcpy(&u, &f, sizeof(int32_t)); - const int32_t exponent = - ((u & exponent_mask) >> exponent_shift) - exponent_bias; - const uint32_t fraction = (u & fraction_mask); - // Expect ~0x2B1B9D3 for fraction. - if (u & sign_mask) { - *current = '-'; - current += 1; - } - *current = 0; - // These are special cases for infinities and not-a-numbers. - if (exponent == 128) { - if (fraction == 0) { - current = StrCatStr(current, (current_end - current), "Inf"); - return current; - } else { - current = StrCatStr(current, (current_end - current), "NaN"); - return current; - } - } - // 0x007fffff (8388607) represents 0.99... for the fraction, so to print the - // correct decimal digits we need to scale our value before passing it to the - // conversion function. This scale should be 10000000/8388608 = 1.1920928955. - // We can approximate this using multiply-adds and right-shifts using the - // values in this array. The 1. portion of the number string is printed out - // in a fixed way before the fraction, below. - const int32_t scale_shifts_size = 13; - const int8_t scale_shifts[13] = {3, 4, 8, 11, 13, 14, 17, - 18, 19, 20, 21, 22, 23}; - uint32_t scaled_fraction = fraction; - for (int i = 0; i < scale_shifts_size; ++i) { - scaled_fraction += (fraction >> scale_shifts[i]); - } - *current = '1'; - current += 1; - *current = '.'; - current += 1; - *current = 0; - - // Prepend leading zeros to fill in all 7 bytes of the fraction. Truncate - // zeros off the end of the fraction. Every fractional value takes 7 bytes. - // For example, 2500 would be written into the buffer as 0002500 since it - // represents .00025. - constexpr int kMaxFractionalDigits = 7; - - // Abort early if there is not enough space in the buffer. - if (current_end - current <= kMaxFractionalDigits) { - return current; - } - - // Pre-fill buffer with zeros to ensure zero-truncation works properly. - for (int i = 1; i < kMaxFractionalDigits; i++) { - *(current + i) = '0'; - } - - // Track how large the fraction is to add leading zeros. - char* previous = current; - current = StrCatUInt32(current, (current_end - current), scaled_fraction, 10); - int fraction_digits = current - previous; - int leading_zeros = kMaxFractionalDigits - fraction_digits; - - // Overwrite the null terminator from StrCatUInt32 to ensure zero-trunctaion - // works properly. - *current = '0'; - - // Shift fraction values and prepend zeros if necessary. - if (leading_zeros != 0) { - for (int i = 0; i < fraction_digits; i++) { - current--; - *(current + leading_zeros) = *current; - *current = '0'; - } - current += kMaxFractionalDigits; - } - - // Truncate trailing zeros for cleaner logs. Ensure we leave at least one - // fractional character for the case when scaled_fraction is 0. - while (*(current - 1) == '0' && (current - 1) > previous) { - current--; - } - *current = 0; - current = StrCatStr(current, (current_end - current), "*2^"); - current = StrCatInt32(current, (current_end - current), exponent); - return current; -} - -int FormatInt32(char* output, int32_t i) { - return static_cast(FastInt32ToBufferLeft(i, output) - output); -} - -int FormatUInt32(char* output, uint32_t i) { - return static_cast(FastUInt32ToBufferLeft(i, output, 10) - output); -} - -int FormatHex(char* output, uint32_t i) { - return static_cast(FastUInt32ToBufferLeft(i, output, 16) - output); -} - -int FormatFloat(char* output, float i) { - return static_cast(FastFloatToBufferLeft(i, output) - output); -} - -} // namespace - -extern "C" int MicroVsnprintf(char* output, int len, const char* format, - va_list args) { - int output_index = 0; - const char* current = format; - // One extra character must be left for the null terminator. - const int usable_length = len - 1; - while (*current != '\0' && output_index < usable_length) { - if (*current == '%') { - current++; - switch (*current) { - case 'd': - // Cut off log message if format could exceed log buffer length. - if (usable_length - output_index < kMaxIntCharsNeeded) { - output[output_index++] = '\0'; - return output_index; - } - output_index += - FormatInt32(&output[output_index], va_arg(args, int32_t)); - current++; - break; - case 'u': - if (usable_length - output_index < kMaxIntCharsNeeded) { - output[output_index++] = '\0'; - return output_index; - } - output_index += - FormatUInt32(&output[output_index], va_arg(args, uint32_t)); - current++; - break; - case 'x': - if (usable_length - output_index < kMaxHexCharsNeeded) { - output[output_index++] = '\0'; - return output_index; - } - output[output_index++] = '0'; - output[output_index++] = 'x'; - output_index += - FormatHex(&output[output_index], va_arg(args, uint32_t)); - current++; - break; - case 'f': - if (usable_length - output_index < kMaxFloatCharsNeeded) { - output[output_index++] = '\0'; - return output_index; - } - output_index += - FormatFloat(&output[output_index], va_arg(args, double)); - current++; - break; - case '%': - output[output_index++] = *current++; - break; - case 'c': - if (usable_length - output_index < 1) { - output[output_index++] = '\0'; - return output_index; - } - output[output_index++] = va_arg(args, int32_t); - current++; - break; - case 's': - char* string = va_arg(args, char*); - int string_idx = 0; - while (string_idx + output_index < usable_length && - string[string_idx] != '\0') { - output[output_index++] = string[string_idx++]; - } - current++; - } - } else { - output[output_index++] = *current++; - } - } - output[output_index++] = '\0'; - return output_index; -} - -extern "C" int MicroSnprintf(char* output, int len, const char* format, ...) { - va_list args; - va_start(args, format); - int bytes_written = MicroVsnprintf(output, len, format, args); - va_end(args); - return bytes_written; -} diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_string.h b/code/components/tflite-lib/tensorflow/lite/micro/micro_string.h deleted file mode 100644 index 59303e82..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_string.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_MICRO_STRING_H_ -#define TENSORFLOW_LITE_MICRO_MICRO_STRING_H_ - -#include - -// Implements simple string formatting for numeric types. Returns the number of -// bytes written to output. -extern "C" { -// Functionally equivalent to vsnprintf, trimmed down for TFLite Micro. -// MicroSnprintf() is implemented using MicroVsnprintf(). -int MicroVsnprintf(char* output, int len, const char* format, va_list args); -// Functionally equavalent to snprintf, trimmed down for TFLite Micro. -// For example, MicroSnprintf(buffer, 10, "int %d", 10) will put the string -// "int 10" in the buffer. -// Floating point values are logged in exponent notation (1.XXX*2^N). -int MicroSnprintf(char* output, int len, const char* format, ...); -} - -#endif // TENSORFLOW_LITE_MICRO_MICRO_STRING_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_time.cc b/code/components/tflite-lib/tensorflow/lite/micro/micro_time.cc deleted file mode 100644 index 2d74fdba..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_time.cc +++ /dev/null @@ -1,58 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Reference implementation of timer functions. Platforms are not required to -// implement these timer methods, but they are required to enable profiling. - -// On platforms that have a POSIX stack or C library, it can be written using -// methods from or clock() from . - -// To add an equivalent function for your own platform, create your own -// implementation file, and place it in a subfolder with named after the OS -// you're targeting. For example, see the Cortex M bare metal version in -// tensorflow/lite/micro/bluepill/micro_time.cc - -#include "tensorflow/lite/micro/micro_time.h" - -#if defined(TF_LITE_USE_CTIME) -#include -#endif - -namespace tflite { - -#if !defined(TF_LITE_USE_CTIME) - -// Reference implementation of the ticks_per_second() function that's required -// for a platform to support Tensorflow Lite for Microcontrollers profiling. -// This returns 0 by default because timing is an optional feature that builds -// without errors on platforms that do not need it. -uint32_t ticks_per_second() { return 0; } - -// Reference implementation of the GetCurrentTimeTicks() function that's -// required for a platform to support Tensorflow Lite for Microcontrollers -// profiling. This returns 0 by default because timing is an optional feature -// that builds without errors on platforms that do not need it. -uint32_t GetCurrentTimeTicks() { return 0; } - -#else // defined(TF_LITE_USE_CTIME) - -// For platforms that support ctime, we implment the micro_time interface in -// this central location. -uint32_t ticks_per_second() { return CLOCKS_PER_SEC; } - -uint32_t GetCurrentTimeTicks() { return clock(); } -#endif - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_time.h b/code/components/tflite-lib/tensorflow/lite/micro/micro_time.h deleted file mode 100644 index 7a8ab455..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_time.h +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_MICRO_TIME_H_ -#define TENSORFLOW_LITE_MICRO_MICRO_TIME_H_ - -#include - -namespace tflite { - -// These functions should be implemented by each target platform, and provide an -// accurate tick count along with how many ticks there are per second. -uint32_t ticks_per_second(); - -// Return time in ticks. The meaning of a tick varies per platform. -uint32_t GetCurrentTimeTicks(); - -inline uint32_t TicksToMs(int32_t ticks) { - return static_cast(1000.0f * static_cast(ticks) / - static_cast(ticks_per_second())); -} - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MICRO_TIME_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_utils.cc b/code/components/tflite-lib/tensorflow/lite/micro/micro_utils.cc deleted file mode 100644 index 97b83695..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_utils.cc +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/micro_utils.h" - -#include -#include -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/op_macros.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_error_reporter.h" - -namespace tflite { - -int ElementCount(const TfLiteIntArray& dims) { - int result = 1; - for (int i = 0; i < dims.size; ++i) { - result *= dims.data[i]; - } - return result; -} - -size_t EvalTensorBytes(const TfLiteEvalTensor* tensor) { - size_t bytes_per_element; - TFLITE_DCHECK(kTfLiteOk == - TfLiteTypeSizeOf(tensor->type, &bytes_per_element)); - return ElementCount(*tensor->dims) * bytes_per_element; -} - -void SignedSymmetricPerChannelQuantize(const float* values, - TfLiteIntArray* dims, - int quantized_dimension, - int8_t* quantized_values, - float* scaling_factors) { - int input_size = ElementCount(*dims); - int channel_count = dims->data[quantized_dimension]; - int per_channel_size = input_size / channel_count; - - int stride; - int channel_stride; - if (quantized_dimension == 0) { - stride = 1; - channel_stride = per_channel_size; - } else if (quantized_dimension == 3) { - stride = channel_count; - channel_stride = 1; - } else { - MicroPrintf("quantized dimension must be 0 or 3"); - TFLITE_ABORT; - } - - // Calculate scales for each channel. - for (int channel = 0; channel < channel_count; channel++) { - float min = 0; - float max = 0; - - for (int i = 0; i < per_channel_size; i++) { - int idx = channel * channel_stride + i * stride; - min = fminf(min, values[idx]); - max = fmaxf(max, values[idx]); - } - scaling_factors[channel] = - fmaxf(fabs(min), fabs(max)) / std::numeric_limits::max(); - for (int i = 0; i < per_channel_size; i++) { - int idx = channel * channel_stride + i * stride; - const int32_t quantized_value = - static_cast(roundf(values[idx] / scaling_factors[channel])); - // Clamp: just in case some odd numeric offset. - quantized_values[idx] = - fminf(std::numeric_limits::max(), - fmaxf(std::numeric_limits::min() + 1, quantized_value)); - } - } -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/micro_utils.h b/code/components/tflite-lib/tensorflow/lite/micro/micro_utils.h deleted file mode 100644 index 84d5c437..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/micro_utils.h +++ /dev/null @@ -1,143 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_ -#define TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_ - -#include -#include -#include -#include - -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -// Returns number of elements in the shape array. - -int ElementCount(const TfLiteIntArray& dims); - -size_t EvalTensorBytes(const TfLiteEvalTensor* tensor); - -// C++11 does not support constexpr max; hence, use ternary conditional to -// create our own constexpr Max function. -constexpr int Max(int a, int b) { return a >= b ? a : b; } - -// Converts a float value into a quantized value. Note that large values (close -// to max int and min int) may see significant error due to a lack of floating -// point granularity for large values. -template -T FloatToQuantizedType(const float value, const float scale, int zero_point) { - int32_t result = round(value / scale) + zero_point; - result = - std::max(static_cast(std::numeric_limits::min()), result); - result = - std::min(static_cast(std::numeric_limits::max()), result); - return result; -} - -template -T FloatToSymmetricQuantizedType(const float value, const float scale) { - // 64-bit values are required since 8x16 conv accumulates to int64, meaning - // an int64 bias is required. - std::int64_t result = round(value / scale); - result = std::max( - static_cast(std::numeric_limits::min() + 1), result); - result = std::min(static_cast(std::numeric_limits::max()), - result); - return result; -} - -// Helper methods to quantize arrays of floats to the desired format. -// -// There are several key flavors of quantization in TfLite: -// asymmetric symmetric per channel -// int8_t | X | X | X | -// uint8_t | X | X | | -// int16_t | X | | | -// int32_t | | X | X | -// -// The per-op quantization spec can be found here: -// https://www.tensorflow.org/lite/performance/quantization_spec -template -void Quantize(const float* input, T* output, int num_elements, float scale, - int zero_point) { - for (int i = 0; i < num_elements; i++) { - output[i] = FloatToQuantizedType(input[i], scale, zero_point); - } -} - -template -void SymmetricQuantize(const float* input, T* output, int num_elements, - float scale) { - for (int i = 0; i < num_elements; i++) { - output[i] = FloatToSymmetricQuantizedType(input[i], scale); - } -} - -template -void SymmetricPerChannelQuantize(const float* input, T* output, - int num_elements, int num_channels, - float* scales) { - int elements_per_channel = num_elements / num_channels; - for (int i = 0; i < num_channels; i++) { - for (int j = 0; j < elements_per_channel; j++) { - output[i * elements_per_channel + j] = FloatToSymmetricQuantizedType( - input[i * elements_per_channel + j], scales[i]); - } - } -} - -void SignedSymmetricPerChannelQuantize(const float* values, - TfLiteIntArray* dims, - int quantized_dimension, - int8_t* quantized_values, - float* scaling_factor); - -// Quantizes inputs based on the values provided, choosing the smallest range -// which includes all input values. -template -void SymmetricQuantizeCalculateScales(const float* values, TfLiteIntArray* dims, - T* output, float* scale) { - int input_size = ElementCount(*dims); - - float min = 0; - float max = 0; - for (int i = 0; i < input_size; i++) { - min = fminf(min, values[i]); - max = fmaxf(max, values[i]); - } - *scale = fmaxf(std::abs(min), std::abs(max)) / std::numeric_limits::max(); - for (int i = 0; i < input_size; i++) { - const int32_t quantized_value = - static_cast(roundf(values[i] / *scale)); - // Clamp: just in case some odd numeric offset. - quantized_value = fminf(std::numeric_limits::max(), quantized_value); - quantized_value = fmaxf(std::numeric_limits::min() + 1, quantized_value); - output[i] = quantized_value; - } -} - -template -void Dequantize(const T* values, const int size, const float scale, - int zero_point, float* dequantized_values) { - for (int i = 0; i < size; ++i) { - dequantized_values[i] = (values[i] - zero_point) * scale; - } -} - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/mock_micro_graph.cc b/code/components/tflite-lib/tensorflow/lite/micro/mock_micro_graph.cc deleted file mode 100644 index 438a4065..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/mock_micro_graph.cc +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/mock_micro_graph.h" - -#include "tensorflow/lite/micro/test_helpers.h" - -namespace tflite { - -MockMicroGraph::MockMicroGraph(SingleArenaBufferAllocator* allocator) - : MicroGraph(nullptr, nullptr, nullptr, nullptr), - allocator_(allocator), - init_count_(0), - prepare_count_(0), - free_count_(0) { - memset(invoke_counts_, 0, sizeof(invoke_counts_)); - mock_tensor_ = - reinterpret_cast(allocator_->AllocatePersistentBuffer( - sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor))); - int* dims_array = reinterpret_cast( - allocator_->AllocatePersistentBuffer(3 * sizeof(int), alignof(int))); - float* data_array = reinterpret_cast( - allocator_->AllocatePersistentBuffer(2 * sizeof(float), alignof(float))); - int dims[] = {2, 1, 2}; - memcpy(dims_array, dims, 3 * sizeof(int)); - mock_tensor_->dims = testing::IntArrayFromInts(dims_array); - mock_tensor_->data.f = data_array; - mock_tensor_->type = kTfLiteFloat32; -} - -TfLiteStatus MockMicroGraph::InvokeSubgraph(int subgraph_idx) { - invoke_counts_[subgraph_idx]++; - return kTfLiteOk; -} - -TfLiteStatus MockMicroGraph::ResetVariableTensors() { return kTfLiteOk; } - -size_t MockMicroGraph::NumSubgraphInputs(int subgraph_idx) { return 1; } - -TfLiteEvalTensor* MockMicroGraph::GetSubgraphInput(int subgraph_idx, - int tensor_idx) { - return mock_tensor_; -} - -size_t MockMicroGraph::NumSubgraphOutputs(int subgraph_idx) { return 1; } - -TfLiteEvalTensor* MockMicroGraph::GetSubgraphOutput(int subgraph_idx, - int tensor_idx) { - return mock_tensor_; -} - -int MockMicroGraph::NumSubgraphs() { return kMaxSubgraphs; } - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/mock_micro_graph.h b/code/components/tflite-lib/tensorflow/lite/micro/mock_micro_graph.h deleted file mode 100644 index 3ae7d7cf..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/mock_micro_graph.h +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_ -#define TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/micro_allocator.h" -#include "tensorflow/lite/micro/micro_graph.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -// MockMicroGraph stubs out all MicroGraph methods used during invoke. A count -// of the number of calls to invoke for each subgraph is maintained for -// validation of control flow operators. -class MockMicroGraph : public MicroGraph { - public: - explicit MockMicroGraph(SingleArenaBufferAllocator* allocator); - TfLiteStatus InvokeSubgraph(int subgraph_idx) override; - TfLiteStatus ResetVariableTensors() override; - size_t NumSubgraphInputs(int subgraph_idx) override; - TfLiteEvalTensor* GetSubgraphInput(int subgraph_idx, int tensor_idx) override; - size_t NumSubgraphOutputs(int subgraph_idx) override; - TfLiteEvalTensor* GetSubgraphOutput(int subgraph_idx, - int tensor_idx) override; - int NumSubgraphs() override; - int get_init_count() const { return init_count_; } - int get_prepare_count() const { return prepare_count_; } - int get_free_count() const { return free_count_; } - int get_invoke_count(int subgraph_idx) const { - return invoke_counts_[subgraph_idx]; - } - - private: - static constexpr int kMaxSubgraphs = 10; - SingleArenaBufferAllocator* allocator_; - TfLiteEvalTensor* mock_tensor_; - int init_count_; - int prepare_count_; - int free_count_; - int invoke_counts_[kMaxSubgraphs]; - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/recording_micro_allocator.h b/code/components/tflite-lib/tensorflow/lite/micro/recording_micro_allocator.h deleted file mode 100644 index 699b1a22..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/recording_micro_allocator.h +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_ -#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_ - -#include "tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h" -#include "tensorflow/lite/micro/compatibility.h" -#include "tensorflow/lite/micro/micro_allocator.h" - -namespace tflite { - -// List of buckets currently recorded by this class. Each type keeps a list of -// allocated information during model initialization. -// TODO(b/169834511): Add tracking for scratch buffer allocations. -enum class RecordedAllocationType { - kTfLiteEvalTensorData, - kPersistentTfLiteTensorData, - kPersistentTfLiteTensorQuantizationData, - kPersistentBufferData, - kTfLiteTensorVariableBufferData, - kNodeAndRegistrationArray, - kOpData, -}; - -// Container for holding information about allocation recordings by a given -// type. Each recording contains the number of bytes requested, the actual bytes -// allocated (can defer from requested by alignment), and the number of items -// allocated. -struct RecordedAllocation { - size_t requested_bytes; - size_t used_bytes; - size_t count; -}; - -// Utility subclass of MicroAllocator that records all allocations -// inside the arena. A summary of allocations can be logged through the -// ErrorReporter by invoking LogAllocations(). This special allocator requires -// an instance of RecordingSingleArenaBufferAllocator to capture allocations in -// the head and tail. Arena allocation recording can be retrieved by type -// through the GetRecordedAllocation() function. This class should only be used -// for auditing memory usage or integration testing. -class RecordingMicroAllocator : public MicroAllocator { - public: - static RecordingMicroAllocator* Create(uint8_t* tensor_arena, - size_t arena_size, - ErrorReporter* error_reporter); - - // Returns the fixed amount of memory overhead of RecordingMicroAllocator. - static size_t GetDefaultTailUsage(); - - // Returns the recorded allocations information for a given allocation type. - RecordedAllocation GetRecordedAllocation( - RecordedAllocationType allocation_type) const; - - const RecordingSingleArenaBufferAllocator* GetSimpleMemoryAllocator() const; - - // Logs out through the ErrorReporter all allocation recordings by type - // defined in RecordedAllocationType. - void PrintAllocations() const; - - void* AllocatePersistentBuffer(size_t bytes) override; - - protected: - TfLiteStatus AllocateNodeAndRegistrations( - const Model* model, SubgraphAllocations* subgraph_allocations) override; - TfLiteStatus AllocateTfLiteEvalTensors( - const Model* model, SubgraphAllocations* subgraph_allocations) override; - TfLiteStatus AllocateVariables(const SubGraph* subgraph, - TfLiteEvalTensor* eval_tensors) override; - // TODO(b/162311891): Once all kernels have been updated to the new API drop - // this method. It is only used to record TfLiteTensor persistent allocations. - TfLiteTensor* AllocatePersistentTfLiteTensorInternal() override; - - // TODO(b/162311891): Once all kernels have been updated to the new API drop - // this function since all allocations for quantized data will take place in - // the temp section. - TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model, - TfLiteTensor* tensor, - int tensor_index, - int subgraph_index, - bool allocate_temp) override; - - private: - RecordingMicroAllocator(RecordingSingleArenaBufferAllocator* memory_allocator, - MicroMemoryPlanner* memory_planner, - ErrorReporter* error_reporter); - - void PrintRecordedAllocation(RecordedAllocationType allocation_type, - const char* allocation_name, - const char* allocation_description) const; - - RecordedAllocation SnapshotAllocationUsage() const; - void RecordAllocationUsage(const RecordedAllocation& snapshotted_allocation, - RecordedAllocation& recorded_allocation); - - const RecordingSingleArenaBufferAllocator* recording_memory_allocator_; - - RecordedAllocation recorded_tflite_eval_tensor_data_ = {}; - RecordedAllocation recorded_persistent_tflite_tensor_data_ = {}; - RecordedAllocation recorded_persistent_tflite_tensor_quantization_data_ = {}; - RecordedAllocation recorded_persistent_buffer_data_ = {}; - RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {}; - RecordedAllocation recorded_node_and_registration_array_data_ = {}; - - // TODO(b/187993291): Re-enable OpData allocating tracking. - RecordedAllocation recorded_op_data_ = {}; - - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/recording_micro_interpreter.h b/code/components/tflite-lib/tensorflow/lite/micro/recording_micro_interpreter.h deleted file mode 100644 index 6d7602be..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/recording_micro_interpreter.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_ -#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_ - -#include "tensorflow/lite/micro/micro_interpreter.h" -#include "tensorflow/lite/micro/recording_micro_allocator.h" - -namespace tflite { - -// Utility subclass that enables internal recordings of the MicroInterpreter. -// This class should be used to audit and analyze memory arena usage for a given -// model and interpreter. -// -// After construction and the first Invoke() or AllocateTensors() call - the -// memory usage is recorded and available through the GetMicroAllocator() -// function. See RecordingMicroAlloctor for more details on what is currently -// recorded from arena allocations. -// -// It is recommended for users to increase the tensor arena size by at least 1kb -// to ensure enough additional memory is available for internal recordings. -class RecordingMicroInterpreter : public MicroInterpreter { - public: - RecordingMicroInterpreter(const Model* model, - const MicroOpResolver& op_resolver, - uint8_t* tensor_arena, size_t tensor_arena_size, - ErrorReporter* error_reporter, - MicroResourceVariables* resource_variable = nullptr, - MicroProfiler* profiler = nullptr) - : MicroInterpreter(model, op_resolver, - RecordingMicroAllocator::Create( - tensor_arena, tensor_arena_size, error_reporter), - error_reporter, resource_variable, profiler), - recording_micro_allocator_( - static_cast(allocator())) {} - - RecordingMicroInterpreter(const Model* model, - const MicroOpResolver& op_resolver, - RecordingMicroAllocator* allocator, - ErrorReporter* error_reporter, - MicroResourceVariables* resource_variable = nullptr, - MicroProfiler* profiler = nullptr) - : MicroInterpreter(model, op_resolver, allocator, error_reporter, - resource_variable, profiler), - recording_micro_allocator_(*allocator) {} - - const RecordingMicroAllocator& GetMicroAllocator() const { - return recording_micro_allocator_; - } - - private: - const RecordingMicroAllocator& recording_micro_allocator_; -}; - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/system_setup.cc b/code/components/tflite-lib/tensorflow/lite/micro/system_setup.cc deleted file mode 100644 index db4a1007..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/system_setup.cc +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/system_setup.h" - -namespace tflite { - -// To add an equivalent function for your own platform, create your own -// implementation file, and place it in a subfolder named after the target. See -// tensorflow/lite/micro/debug_log.cc for a similar example. -void InitializeTarget() {} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/system_setup.h b/code/components/tflite-lib/tensorflow/lite/micro/system_setup.h deleted file mode 100644 index 71ab13a8..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/system_setup.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_MICRO_SYSTEM_SETUP_H_ -#define TENSORFLOW_LITE_MICRO_SYSTEM_SETUP_H_ - -namespace tflite { - -// This should called during initialization of TFLM binaries and tests. It can -// be specialized if there is a need for custom target-specific intialization. -// For more information, see tensorflow/lite/micro/system_setup.cc. -void InitializeTarget(); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_SYSTEM_SETUP_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/test_helper_custom_ops.cc b/code/components/tflite-lib/tensorflow/lite/micro/test_helper_custom_ops.cc deleted file mode 100644 index c89483e1..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/test_helper_custom_ops.cc +++ /dev/null @@ -1,113 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/test_helper_custom_ops.h" - -#include -#include -#include -#include -#include - -#include "flatbuffers/flatbuffers.h" // from @flatbuffers -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/micro_utils.h" -#include "tensorflow/lite/schema/schema_generated.h" - -// TODO(b/170464050): Use TFLM test only version of schema_utils. - -namespace tflite { -namespace testing { - -const TfLiteRegistration* PackerOp::getRegistration() { - return GetMutableRegistration(); -} - -TfLiteRegistration* PackerOp::GetMutableRegistration() { - static TfLiteRegistration r; - r.init = Init; - r.prepare = Prepare; - r.invoke = Invoke; - r.free = Free; - return &r; -} - -void* PackerOp::Init(TfLiteContext* context, const char* buffer, - size_t length) { - freed_ = false; - // Do nothing. - return nullptr; -} - -void PackerOp::Free(TfLiteContext* context, void* buffer) { freed_ = true; } - -TfLiteStatus PackerOp::Prepare(TfLiteContext* context, TfLiteNode* node) { - return kTfLiteOk; -} - -TfLiteStatus PackerOp::Invoke(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input1 = - tflite::micro::GetEvalInput(context, node, 0); - TF_LITE_ENSURE(context, input1 != nullptr); - const int32_t* input1_data = input1->data.i32; - TF_LITE_ENSURE_EQ(context, input1->dims->size, 1); - const int32_t input1_len = input1->dims->data[0]; - - const TfLiteEvalTensor* input2 = - tflite::micro::GetEvalInput(context, node, 1); - TF_LITE_ENSURE(context, input2 != nullptr); - const int32_t* input2_data = input2->data.i32; - TF_LITE_ENSURE_EQ(context, input2->dims->size, 1); - const int32_t input2_len = input2->dims->data[0]; - - TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); - TF_LITE_ENSURE(context, output != nullptr); - int32_t* output_data = output->data.i32; - int32_t output_len = output->dims->data[0]; - - // Fill output with input: first with the first tensor, then with the second - // tensor up to the size of the output tensor. - int cnt = 0; - int i; - for (i = 0; i < input1_len && cnt < output_len; i++, cnt++) { - output_data[cnt] = input1_data[i]; - } - if (cnt >= output_len) { - return kTfLiteOk; - } - - for (i = 0; i < input2_len && cnt < output_len; i++, cnt++) { - output_data[cnt] = input2_data[i]; - } - if (cnt >= output_len) { - return kTfLiteOk; - } - - for (; cnt < output_len; cnt++) { - output_data[cnt] = 0; - } - return kTfLiteOk; -} - -bool PackerOp::freed_ = false; - -} // namespace testing -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/test_helper_custom_ops.h b/code/components/tflite-lib/tensorflow/lite/micro/test_helper_custom_ops.h deleted file mode 100644 index b8c025a7..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/test_helper_custom_ops.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_TEST_HELPER_CUSTOM_OPS_H_ -#define TENSORFLOW_LITE_MICRO_TEST_HELPER_CUSTOM_OPS_H_ - -#include -#include - -#include "flatbuffers/flatbuffers.h" // from @flatbuffers -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" -#include "tensorflow/lite/micro/micro_utils.h" -#include "tensorflow/lite/portable_type_to_tflitetype.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { -namespace testing { - -class PackerOp { - public: - static const TfLiteRegistration* getRegistration(); - static TfLiteRegistration* GetMutableRegistration(); - static void* Init(TfLiteContext* context, const char* buffer, size_t length); - static void Free(TfLiteContext* context, void* buffer); - static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); - static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); - - private: - static bool freed_; -}; - -} // namespace testing -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_TEST_HELPER_CUSTOM_OPS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/micro/test_helpers.cc b/code/components/tflite-lib/tensorflow/lite/micro/test_helpers.cc deleted file mode 100644 index 2411bbf8..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/test_helpers.cc +++ /dev/null @@ -1,1914 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/micro/test_helpers.h" - -#include -#include -#include -#include -#include - -#include "flatbuffers/flatbuffers.h" // from @flatbuffers -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/core/api/error_reporter.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" -#include "tensorflow/lite/micro/memory_helpers.h" -#include "tensorflow/lite/micro/micro_arena_constants.h" -#include "tensorflow/lite/micro/micro_utils.h" -#include "tensorflow/lite/micro/test_helper_custom_ops.h" -#include "tensorflow/lite/schema/schema_generated.h" - -// TODO(b/170464050): Use TFLM test only version of schema_utils. - -namespace tflite { -namespace testing { -namespace { - -class StackAllocator : public flatbuffers::Allocator { - public: - StackAllocator(size_t alignment) : data_size_(0) { - data_ = AlignPointerUp(data_backing_, alignment); - } - - uint8_t* allocate(size_t size) override { - TFLITE_DCHECK((data_size_ + size) <= kStackAllocatorSize); - uint8_t* result = data_; - data_ += size; - data_size_ += size; - return result; - } - - void deallocate(uint8_t* p, size_t) override {} - - static StackAllocator& instance(size_t alignment = 1) { - // Avoid using true dynamic memory allocation to be portable to bare metal. - static char inst_memory[sizeof(StackAllocator)]; - static StackAllocator* inst = new (inst_memory) StackAllocator(alignment); - return *inst; - } - - static constexpr size_t kStackAllocatorSize = 8192; - - private: - uint8_t data_backing_[kStackAllocatorSize]; - uint8_t* data_; - int data_size_; - - TF_LITE_REMOVE_VIRTUAL_DELETE -}; - -flatbuffers::FlatBufferBuilder* BuilderInstance() { - static char inst_memory[sizeof(flatbuffers::FlatBufferBuilder)]; - static flatbuffers::FlatBufferBuilder* inst = - new (inst_memory) flatbuffers::FlatBufferBuilder( - StackAllocator::kStackAllocatorSize, - &StackAllocator::instance(MicroArenaBufferAlignment())); - return inst; -} - -// A wrapper around FlatBuffer API to help build model easily. -class ModelBuilder { - public: - typedef int32_t Tensor; - typedef int Operator; - typedef int Node; - - // `builder` needs to be available until BuildModel is called. - explicit ModelBuilder(flatbuffers::FlatBufferBuilder* builder) - : builder_(builder) {} - - // Registers an operator that will be used in the model. - Operator RegisterOp(BuiltinOperator op, const char* custom_code); - - // Adds a tensor to the model. - Tensor AddTensor(TensorType type, std::initializer_list shape) { - return AddTensorImpl(type, /* is_variable */ false, shape); - } - - // Adds a variable tensor to the model. - Tensor AddVariableTensor(TensorType type, - std::initializer_list shape) { - return AddTensorImpl(type, /* is_variable */ true, shape); - } - - // Adds a node to the model with given input and output Tensors. - Node AddNode(Operator op, std::initializer_list inputs, - std::initializer_list outputs, - std::initializer_list intermediates = - std::initializer_list{}); - - void AddMetadata(const char* description_string, - const int32_t* metadata_buffer_data, size_t num_elements); - - // Constructs the flatbuffer model using `builder_` and return a pointer to - // it. The returned model has the same lifetime as `builder_`. - // Note the default value of 0 for num_subgraph_inputs means all tensor inputs - // are in subgraph input list. - const Model* BuildModel(std::initializer_list inputs, - std::initializer_list outputs, - size_t num_subgraph_inputs = 0); - - private: - // Adds a tensor to the model. - Tensor AddTensorImpl(TensorType type, bool is_variable, - std::initializer_list shape); - - flatbuffers::FlatBufferBuilder* builder_; - - static constexpr int kMaxOperatorCodes = 10; - flatbuffers::Offset operator_codes_[kMaxOperatorCodes]; - int next_operator_code_id_ = 0; - - static constexpr int kMaxOperators = 50; - flatbuffers::Offset operators_[kMaxOperators]; - int next_operator_id_ = 0; - - static constexpr int kMaxTensors = 50; - flatbuffers::Offset tensors_[kMaxTensors]; - - static constexpr int kMaxMetadataBuffers = 10; - - static constexpr int kMaxMetadatas = 10; - flatbuffers::Offset metadata_[kMaxMetadatas]; - - flatbuffers::Offset metadata_buffers_[kMaxMetadataBuffers]; - - int nbr_of_metadata_buffers_ = 0; - - int next_tensor_id_ = 0; -}; - -ModelBuilder::Operator ModelBuilder::RegisterOp(BuiltinOperator op, - const char* custom_code) { - TFLITE_DCHECK(next_operator_code_id_ <= kMaxOperatorCodes); - operator_codes_[next_operator_code_id_] = tflite::CreateOperatorCodeDirect( - *builder_, /*deprecated_builtin_code=*/0, custom_code, /*version=*/0, op); - next_operator_code_id_++; - return next_operator_code_id_ - 1; -} - -ModelBuilder::Node ModelBuilder::AddNode( - ModelBuilder::Operator op, - std::initializer_list inputs, - std::initializer_list outputs, - std::initializer_list intermediates) { - TFLITE_DCHECK(next_operator_id_ <= kMaxOperators); - operators_[next_operator_id_] = tflite::CreateOperator( - *builder_, op, builder_->CreateVector(inputs.begin(), inputs.size()), - builder_->CreateVector(outputs.begin(), outputs.size()), - BuiltinOptions_NONE, - /*builtin_options=*/0, - /*custom_options=*/0, tflite::CustomOptionsFormat_FLEXBUFFERS, - /*mutating_variable_inputs =*/0, - builder_->CreateVector(intermediates.begin(), intermediates.size())); - next_operator_id_++; - return next_operator_id_ - 1; -} - -void ModelBuilder::AddMetadata(const char* description_string, - const int32_t* metadata_buffer_data, - size_t num_elements) { - metadata_[ModelBuilder::nbr_of_metadata_buffers_] = - CreateMetadata(*builder_, builder_->CreateString(description_string), - 1 + ModelBuilder::nbr_of_metadata_buffers_); - - metadata_buffers_[nbr_of_metadata_buffers_] = tflite::CreateBuffer( - *builder_, builder_->CreateVector((uint8_t*)metadata_buffer_data, - sizeof(uint32_t) * num_elements)); - - ModelBuilder::nbr_of_metadata_buffers_++; -} - -const Model* ModelBuilder::BuildModel( - std::initializer_list inputs, - std::initializer_list outputs, - size_t num_subgraph_inputs) { - // Model schema requires an empty buffer at idx 0. - size_t buffer_size = 1 + ModelBuilder::nbr_of_metadata_buffers_; - flatbuffers::Offset buffers[kMaxMetadataBuffers]; - buffers[0] = tflite::CreateBuffer(*builder_); - - // Place the metadata buffers first in the buffer since the indices for them - // have already been set in AddMetadata() - for (int i = 1; i < ModelBuilder::nbr_of_metadata_buffers_ + 1; ++i) { - buffers[i] = metadata_buffers_[i - 1]; - } - - // Default to single subgraph model. - constexpr size_t subgraphs_size = 1; - - // Find out number of subgraph inputs. - if (num_subgraph_inputs == 0) { - // This is the default case. - num_subgraph_inputs = inputs.size(); - } else { - // A non-zero value of num_subgraph_inputs means that some of - // the operator input tensors are not subgraph inputs. - TFLITE_DCHECK(num_subgraph_inputs <= inputs.size()); - } - - const flatbuffers::Offset subgraphs[subgraphs_size] = { - tflite::CreateSubGraph( - *builder_, builder_->CreateVector(tensors_, next_tensor_id_), - builder_->CreateVector(inputs.begin(), num_subgraph_inputs), - builder_->CreateVector(outputs.begin(), outputs.size()), - builder_->CreateVector(operators_, next_operator_id_), - builder_->CreateString("test_subgraph"))}; - - flatbuffers::Offset model_offset; - if (ModelBuilder::nbr_of_metadata_buffers_ > 0) { - model_offset = tflite::CreateModel( - *builder_, 0, - builder_->CreateVector(operator_codes_, next_operator_code_id_), - builder_->CreateVector(subgraphs, subgraphs_size), - builder_->CreateString("teset_model"), - builder_->CreateVector(buffers, buffer_size), 0, - builder_->CreateVector(metadata_, - ModelBuilder::nbr_of_metadata_buffers_)); - } else { - model_offset = tflite::CreateModel( - *builder_, 0, - builder_->CreateVector(operator_codes_, next_operator_code_id_), - builder_->CreateVector(subgraphs, subgraphs_size), - builder_->CreateString("teset_model"), - builder_->CreateVector(buffers, buffer_size)); - } - - tflite::FinishModelBuffer(*builder_, model_offset); - void* model_pointer = builder_->GetBufferPointer(); - const Model* model = flatbuffers::GetRoot(model_pointer); - return model; -} - -ModelBuilder::Tensor ModelBuilder::AddTensorImpl( - TensorType type, bool is_variable, std::initializer_list shape) { - TFLITE_DCHECK(next_tensor_id_ <= kMaxTensors); - tensors_[next_tensor_id_] = tflite::CreateTensor( - *builder_, builder_->CreateVector(shape.begin(), shape.size()), type, - /* buffer */ 0, /* name */ 0, /* quantization */ 0, - /* is_variable */ is_variable, - /* sparsity */ 0); - next_tensor_id_++; - return next_tensor_id_ - 1; -} - -const Model* BuildSimpleStatefulModel() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance(); - - ModelBuilder model_builder(fb_builder); - - const int op_id = - model_builder.RegisterOp(BuiltinOperator_CUSTOM, "simple_stateful_op"); - const int input_tensor = model_builder.AddTensor(TensorType_INT8, {3}); - const int median_tensor = model_builder.AddTensor(TensorType_INT8, {3}); - const int invoke_count_tensor = - model_builder.AddTensor(TensorType_INT32, {1}); - const int intermediate_tensor = - model_builder.AddTensor(TensorType_FLOAT32, {0}); - - model_builder.AddNode(op_id, {input_tensor}, - {median_tensor, invoke_count_tensor}, - {intermediate_tensor}); - return model_builder.BuildModel({input_tensor}, - {median_tensor, invoke_count_tensor}); -} - -const Model* BuildSimpleModelWithBranch() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance(); - - ModelBuilder model_builder(fb_builder); - /* Model structure - | t0 - +------| - | v - | +---------+ - | | n0 | - | | | - | +---------+ - v + - | - +---------+ | t1 - | n1 | | - | | | - +---------+ | - | | - t2 | v - | +---------+ - +-->| n2 | - | | - +-------|-+ - |t3 - v - */ - const int op_id = - model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom"); - const int t0 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3}); - const int t1 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3}); - const int t2 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3}); - const int t3 = model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3}); - model_builder.AddNode(op_id, {t0}, {t1}); // n0 - model_builder.AddNode(op_id, {t0}, {t2}); // n1 - model_builder.AddNode(op_id, {t1, t2}, {t3}); // n2 - return model_builder.BuildModel({t0}, {t3}); -} - -const Model* BuildModelWithOfflinePlanning(int number_of_tensors, - const int32_t* metadata_buffer, - NodeConnection* node_conn, - int num_conns, - int num_subgraph_inputs) { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance(); - - ModelBuilder model_builder(fb_builder); - - const int op_id = - model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom"); - - for (int i = 0; i < number_of_tensors; ++i) { - model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3}); - } - - for (int i = 0; i < num_conns; ++i) { - model_builder.AddNode(op_id, node_conn[i].input, node_conn[i].output); - } - - model_builder.AddMetadata( - "OfflineMemoryAllocation", metadata_buffer, - number_of_tensors + tflite::testing::kOfflinePlannerHeaderSize); - - return model_builder.BuildModel( - node_conn[0].input, node_conn[num_conns - 1].output, num_subgraph_inputs); -} - -const Model* BuildModelWithUnusedInputs() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - - constexpr size_t buffers_size = 1; - const Offset buffers[buffers_size] = {CreateBuffer(*builder)}; - constexpr size_t tensor_shape_size = 2; - const int32_t tensor_shape[tensor_shape_size] = {1, 64}; - constexpr size_t tensors_size = 4; - const Offset tensors[tensors_size] = { - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 0, - builder->CreateString("test_input_tensor"), 0, false), - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 0, - builder->CreateString("test_unused_input_tensor"), 0, false), - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 0, - builder->CreateString("test_output_tensor"), 0, false), - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 0, - builder->CreateString("test_unused_tensor"), 0, false), - }; - constexpr size_t inputs_size = 2; - const int32_t inputs[inputs_size] = {0, 1}; - constexpr size_t outputs_size = 1; - const int32_t outputs[outputs_size] = {2}; - constexpr size_t operator_inputs_size = 1; - const int32_t operator_inputs[operator_inputs_size] = {0}; - constexpr size_t operator_outputs_size = 1; - const int32_t operator_outputs[operator_outputs_size] = {2}; - constexpr size_t operators_size = 1; - const Offset operators[operators_size] = { - CreateOperator( - *builder, 0, - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, operator_outputs_size), - BuiltinOptions_NONE), - }; - constexpr size_t subgraphs_size = 1; - const Offset subgraphs[subgraphs_size] = { - CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), - builder->CreateVector(inputs, inputs_size), - builder->CreateVector(outputs, outputs_size), - builder->CreateVector(operators, operators_size), - builder->CreateString("test_subgraph"))}; - constexpr size_t operator_codes_size = 1; - const Offset operator_codes[operator_codes_size] = { - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "mock_custom", - /*version=*/0, BuiltinOperator_CUSTOM)}; - const Offset model_offset = CreateModel( - *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), - builder->CreateVector(subgraphs, subgraphs_size), - builder->CreateString("test_model"), - builder->CreateVector(buffers, buffers_size)); - FinishModelBuffer(*builder, model_offset); - void* model_pointer = builder->GetBufferPointer(); - const Model* model = flatbuffers::GetRoot(model_pointer); - return model; -} - -const Model* BuildModelWithUnusedOperatorOutputs() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - - constexpr size_t buffers_size = 1; - const Offset buffers[buffers_size] = {CreateBuffer(*builder)}; - constexpr size_t tensor_shape_size = 2; - const int32_t tensor_shape[tensor_shape_size] = {1, 64}; - constexpr size_t tensors_size = 2; - const Offset tensors[tensors_size] = { - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 0, - builder->CreateString("test_input_tensor"), 0, false), - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 0, - builder->CreateString("test_unused_output_tensor"), 0, false)}; - constexpr size_t inputs_size = 0; - const int32_t inputs[inputs_size] = {}; - constexpr size_t outputs_size = 1; - const int32_t outputs[outputs_size] = {0}; - constexpr size_t operator_inputs_size = 0; - const int32_t operator_inputs[operator_inputs_size] = {}; - constexpr size_t operator_outputs_size = 2; - const int32_t operator_outputs[operator_outputs_size] = {0, 1}; - constexpr size_t operators_size = 1; - const Offset operators[operators_size] = { - CreateOperator( - *builder, 0, - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, operator_outputs_size), - BuiltinOptions_NONE), - }; - constexpr size_t subgraphs_size = 1; - const Offset subgraphs[subgraphs_size] = { - CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), - builder->CreateVector(inputs, inputs_size), - builder->CreateVector(outputs, outputs_size), - builder->CreateVector(operators, operators_size), - builder->CreateString("test_subgraph"))}; - constexpr size_t operator_codes_size = 1; - const Offset operator_codes[operator_codes_size] = { - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "mock_custom", - /*version=*/0, BuiltinOperator_CUSTOM)}; - const Offset model_offset = CreateModel( - *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), - builder->CreateVector(subgraphs, subgraphs_size), - builder->CreateString("test_model"), - builder->CreateVector(buffers, buffers_size)); - FinishModelBuffer(*builder, model_offset); - void* model_pointer = builder->GetBufferPointer(); - const Model* model = flatbuffers::GetRoot(model_pointer); - return model; -} - -const Model* BuildModelWith256x256Tensor() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance(); - - ModelBuilder model_builder(fb_builder); - - const int op_id = - model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom"); - const int input1_tensor = - model_builder.AddTensor(TensorType_INT8, {256, 256}); - const int input2_tensor = - model_builder.AddTensor(TensorType_INT8, {256, 256}); - const int output_tensor = - model_builder.AddTensor(TensorType_INT8, {256, 256}); - - model_builder.AddNode(op_id, {input1_tensor, input2_tensor}, {output_tensor}); - return model_builder.BuildModel({input1_tensor, input2_tensor}, - {output_tensor}); -} - -const Model* BuildSimpleMockModel() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - - constexpr size_t buffer_data_size = 1; - const uint8_t buffer_data[buffer_data_size] = {21}; - constexpr size_t buffers_size = 2; - const Offset buffers[buffers_size] = { - CreateBuffer(*builder), - CreateBuffer(*builder, - builder->CreateVector(buffer_data, buffer_data_size))}; - constexpr size_t tensor_shape_size = 1; - const int32_t tensor_shape[tensor_shape_size] = {1}; - constexpr size_t tensors_size = 4; - const Offset tensors[tensors_size] = { - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, - builder->CreateString("test_input_tensor"), 0, false), - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 1, - builder->CreateString("test_weight_tensor"), 0, false), - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, - builder->CreateString("test_output_tensor"), 0, false), - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, - builder->CreateString("test_output2_tensor"), 0, false), - }; - constexpr size_t inputs_size = 1; - const int32_t inputs[inputs_size] = {0}; - constexpr size_t outputs_size = 2; - const int32_t outputs[outputs_size] = {2, 3}; - constexpr size_t operator_inputs_size = 2; - const int32_t operator_inputs[operator_inputs_size] = {0, 1}; - constexpr size_t operator_outputs_size = 1; - const int32_t operator_outputs[operator_outputs_size] = {2}; - const int32_t operator2_outputs[operator_outputs_size] = {3}; - constexpr size_t operators_size = 2; - const Offset operators[operators_size] = { - CreateOperator( - *builder, 0, - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, operator_outputs_size), - BuiltinOptions_NONE), - CreateOperator( - *builder, 0, - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator2_outputs, operator_outputs_size), - BuiltinOptions_NONE), - }; - constexpr size_t subgraphs_size = 1; - const Offset subgraphs[subgraphs_size] = { - CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), - builder->CreateVector(inputs, inputs_size), - builder->CreateVector(outputs, outputs_size), - builder->CreateVector(operators, operators_size), - builder->CreateString("test_subgraph"))}; - constexpr size_t operator_codes_size = 1; - const Offset operator_codes[operator_codes_size] = { - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "mock_custom", - /*version=*/0, BuiltinOperator_CUSTOM)}; - const Offset model_offset = CreateModel( - *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), - builder->CreateVector(subgraphs, subgraphs_size), - builder->CreateString("test_model"), - builder->CreateVector(buffers, buffers_size)); - FinishModelBuffer(*builder, model_offset); - void* model_pointer = builder->GetBufferPointer(); - const Model* model = flatbuffers::GetRoot(model_pointer); - return model; -} - -const Model* BuildComplexMockModel() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - - constexpr size_t buffer_data_size = 1; - const uint8_t buffer_data_1[buffer_data_size] = {21}; - const uint8_t buffer_data_2[buffer_data_size] = {21}; - const uint8_t buffer_data_3[buffer_data_size] = {21}; - constexpr size_t buffers_size = 7; - const Offset buffers[buffers_size] = { - // Op 1 buffers: - CreateBuffer(*builder), - CreateBuffer(*builder), - CreateBuffer(*builder, - builder->CreateVector(buffer_data_1, buffer_data_size)), - // Op 2 buffers: - CreateBuffer(*builder), - CreateBuffer(*builder, - builder->CreateVector(buffer_data_2, buffer_data_size)), - // Op 3 buffers: - CreateBuffer(*builder), - CreateBuffer(*builder, - builder->CreateVector(buffer_data_3, buffer_data_size)), - }; - constexpr size_t tensor_shape_size = 1; - const int32_t tensor_shape[tensor_shape_size] = {1}; - - constexpr size_t tensors_size = 10; - const Offset tensors[tensors_size] = { - // Op 1 inputs: - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, builder->CreateString("test_input_tensor_1"), 0, - false /* is_variable */), - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 1, builder->CreateString("test_variable_tensor_1"), - 0, true /* is_variable */), - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 2, builder->CreateString("test_weight_tensor_1"), 0, - false /* is_variable */), - // Op 1 output / Op 2 input: - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, builder->CreateString("test_output_tensor_1"), 0, - false /* is_variable */), - // Op 2 inputs: - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 1, builder->CreateString("test_variable_tensor_2"), - 0, true /* is_variable */), - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 2, builder->CreateString("test_weight_tensor_2"), 0, - false /* is_variable */), - // Op 2 output / Op 3 input: - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, builder->CreateString("test_output_tensor_2"), 0, - false /* is_variable */), - // Op 3 inputs: - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 1, builder->CreateString("test_variable_tensor_3"), - 0, true /* is_variable */), - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 2, builder->CreateString("test_weight_tensor_3"), 0, - false /* is_variable */), - // Op 3 output: - CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, builder->CreateString("test_output_tensor_3"), 0, - false /* is_variable */), - }; - - constexpr size_t operators_size = 3; - Offset operators[operators_size]; - { - // Set Op 1 attributes: - constexpr size_t operator_inputs_size = 3; - const int32_t operator_inputs[operator_inputs_size] = {0, 1, 2}; - constexpr size_t operator_outputs_size = 1; - const int32_t operator_outputs[operator_outputs_size] = {3}; - - operators[0] = {CreateOperator( - *builder, 0, - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, operator_outputs_size), - BuiltinOptions_NONE)}; - } - - { - // Set Op 2 attributes - constexpr size_t operator_inputs_size = 3; - const int32_t operator_inputs[operator_inputs_size] = {3, 4, 5}; - constexpr size_t operator_outputs_size = 1; - const int32_t operator_outputs[operator_outputs_size] = {6}; - - operators[1] = {CreateOperator( - *builder, 0, - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, operator_outputs_size), - BuiltinOptions_NONE)}; - } - - { - // Set Op 3 attributes - constexpr size_t operator_inputs_size = 3; - const int32_t operator_inputs[operator_inputs_size] = {6, 7, 8}; - constexpr size_t operator_outputs_size = 1; - const int32_t operator_outputs[operator_outputs_size] = {9}; - - operators[2] = {CreateOperator( - *builder, 0, - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, operator_outputs_size), - BuiltinOptions_NONE)}; - } - - constexpr size_t inputs_size = 1; - const int32_t inputs[inputs_size] = {0}; - constexpr size_t outputs_size = 1; - const int32_t outputs[outputs_size] = {9}; - - constexpr size_t subgraphs_size = 1; - const Offset subgraphs[subgraphs_size] = { - CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), - builder->CreateVector(inputs, inputs_size), - builder->CreateVector(outputs, outputs_size), - builder->CreateVector(operators, operators_size), - builder->CreateString("test_subgraph"))}; - - constexpr size_t operator_codes_size = 1; - const Offset operator_codes[operator_codes_size] = { - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "mock_custom", - /*version=*/0, BuiltinOperator_CUSTOM)}; - - const Offset model_offset = CreateModel( - *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), - builder->CreateVector(subgraphs, subgraphs_size), - builder->CreateString("test_model"), - builder->CreateVector(buffers, buffers_size)); - - FinishModelBuffer(*builder, model_offset); - void* model_pointer = builder->GetBufferPointer(); - const Model* model = flatbuffers::GetRoot(model_pointer); - return model; -} - -const Model* BuildSimpleMultipleInputsModel() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - - constexpr size_t buffers_size = 1; - const Offset buffers[buffers_size] = { - CreateBuffer(*builder), - }; - constexpr size_t tensor_shape_size = 1; - const int32_t tensor_shape[tensor_shape_size] = {1}; - constexpr size_t tensors_size = 4; - const Offset tensors[tensors_size] = { - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, - builder->CreateString("test_input_tensor1"), 0, false), - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT8, 0, - builder->CreateString("test_input_tensor2"), 0, false), - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, - builder->CreateString("test_input_tensor3"), 0, false), - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, - builder->CreateString("test_output_tensor"), 0, false), - }; - constexpr size_t inputs_size = 3; - const int32_t inputs[inputs_size] = {0, 1, 2}; - constexpr size_t outputs_size = 1; - const int32_t outputs[outputs_size] = {3}; - constexpr size_t operator_inputs_size = 3; - const int32_t operator_inputs[operator_inputs_size] = {0, 1, 2}; - constexpr size_t operator_outputs_size = 1; - const int32_t operator_outputs[operator_outputs_size] = {3}; - constexpr size_t operators_size = 1; - const Offset operators[operators_size] = { - CreateOperator( - *builder, 0, - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, operator_outputs_size), - BuiltinOptions_NONE), - }; - constexpr size_t subgraphs_size = 1; - const Offset subgraphs[subgraphs_size] = { - CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), - builder->CreateVector(inputs, inputs_size), - builder->CreateVector(outputs, outputs_size), - builder->CreateVector(operators, operators_size), - builder->CreateString("test_subgraph"))}; - constexpr size_t operator_codes_size = 1; - const Offset operator_codes[operator_codes_size] = { - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "multiple_inputs_op", - /*version=*/0, BuiltinOperator_CUSTOM)}; - const Offset model_offset = CreateModel( - *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), - builder->CreateVector(subgraphs, subgraphs_size), - builder->CreateString("test_model"), - builder->CreateVector(buffers, buffers_size)); - FinishModelBuffer(*builder, model_offset); - void* model_pointer = builder->GetBufferPointer(); - const Model* model = flatbuffers::GetRoot(model_pointer); - return model; -} - -const Model* BuildSimpleModelWithSubgraphsAndIf() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - - constexpr size_t buffers_size = 1; - const Offset buffers[buffers_size] = { - CreateBuffer(*builder), - }; - const int32_t condition_tensor_shape[] = {1}; - const int32_t data_tensor_shape[] = {1, 2}; - constexpr size_t tensors_size = 4; - const Offset subgraph1_tensors[tensors_size] = { - CreateTensor(*builder, builder->CreateVector(condition_tensor_shape, 1), - TensorType_BOOL, 0, - builder->CreateString("condition tensor"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor1"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor2"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), - TensorType_FLOAT32, 0, - builder->CreateString("output_tensor"), 0, false), - }; - const Offset subgraph2_tensors[tensors_size] = { - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor1"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor2"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), - TensorType_FLOAT32, 0, - builder->CreateString("output_tensor"), 0, false), - }; - const Offset subgraph3_tensors[tensors_size] = { - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor1"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor2"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2), - TensorType_FLOAT32, 0, - builder->CreateString("output_tensor"), 0, false), - }; - - constexpr size_t if_inputs_size = 3; - const int32_t if_inputs[if_inputs_size] = {0, 1, 2}; - constexpr size_t outputs_size = 1; - const int32_t if_outputs[outputs_size] = {3}; - constexpr size_t operator_inputs_size = 2; - const int32_t operator_inputs[operator_inputs_size] = {0, 1}; - const int32_t operator_outputs[outputs_size] = {2}; - constexpr size_t operators_size = 1; - const Offset subgraph1_operators[operators_size] = { - CreateOperator( - *builder, 0, builder->CreateVector(if_inputs, if_inputs_size), - builder->CreateVector(if_outputs, outputs_size), - BuiltinOptions_IfOptions, CreateIfOptions(*builder, 1, 2).Union()), - }; - const Offset subgraph2_operators[operators_size] = { - CreateOperator( - *builder, 1, - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, outputs_size), - BuiltinOptions_NONE), - }; - const Offset subgraph3_operators[operators_size] = { - CreateOperator( - *builder, 2, - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, outputs_size), - BuiltinOptions_NONE), - }; - constexpr size_t subgraphs_size = 3; - const Offset subgraphs[subgraphs_size] = { - CreateSubGraph(*builder, builder->CreateVector(subgraph1_tensors, 4), - builder->CreateVector(if_inputs, if_inputs_size), - builder->CreateVector(if_outputs, outputs_size), - builder->CreateVector(subgraph1_operators, operators_size), - builder->CreateString("if_subgraph")), - CreateSubGraph( - *builder, builder->CreateVector(subgraph2_tensors, 3), - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, outputs_size), - builder->CreateVector(subgraph2_operators, operators_size), - builder->CreateString("then_subgraph")), - CreateSubGraph( - *builder, builder->CreateVector(subgraph3_tensors, 3), - builder->CreateVector(operator_inputs, operator_inputs_size), - builder->CreateVector(operator_outputs, outputs_size), - builder->CreateVector(subgraph3_operators, operators_size), - builder->CreateString("else_subgraph")), - }; - constexpr size_t operator_codes_size = 3; - const Offset operator_codes[operator_codes_size] = { - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "multiple_inputs_op", - /*version=*/0, BuiltinOperator_IF), - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "multiple_inputs_op", - /*version=*/0, BuiltinOperator_ADD), - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "multiple_inputs_op", - /*version=*/0, BuiltinOperator_MUL), - }; - const Offset model_offset = CreateModel( - *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), - builder->CreateVector(subgraphs, subgraphs_size), - builder->CreateString("test_model"), - builder->CreateVector(buffers, buffers_size)); - FinishModelBuffer(*builder, model_offset); - void* model_pointer = builder->GetBufferPointer(); - const Model* model = flatbuffers::GetRoot(model_pointer); - return model; -} - -const Model* BuildSimpleModelWithSubgraphsAndWhile() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - - constexpr size_t buffers_size = 1; - const Offset buffers[buffers_size] = { - CreateBuffer(*builder), - }; - const int32_t data_tensor_shape[] = {1, 1}; - constexpr size_t while_tensors_size = 4; - constexpr size_t op_tensors_size = 3; - const Offset subgraph0_tensors[while_tensors_size] = { - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor0"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor1"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), - TensorType_FLOAT32, 0, - builder->CreateString("output_tensor0"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), - TensorType_FLOAT32, 0, - builder->CreateString("output_tensor1"), 0, false), - }; - const Offset subgraph1_tensors[op_tensors_size] = { - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor1"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor2"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), - TensorType_BOOL, 0, - builder->CreateString("condition_tensor"), 0, false), - }; - const Offset subgraph2_tensors[op_tensors_size] = { - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor0"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), - TensorType_FLOAT32, 0, - builder->CreateString("input_tensor1"), 0, false), - CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1), - TensorType_FLOAT32, 0, - builder->CreateString("output_tensor0"), 0, false), - }; - - constexpr size_t inputs_size = 2; - const int32_t inputs[inputs_size] = {0, 1}; - constexpr size_t while_outputs_size = 2; - const int32_t while_outputs[while_outputs_size] = {2, 3}; - constexpr size_t cond_outputs_size = 1; - const int32_t cond_outputs[cond_outputs_size] = {2}; - constexpr size_t add_outputs_size = 1; - const int32_t add_outputs[add_outputs_size] = {2}; - constexpr size_t add_subgraph_outputs_size = 2; - const int32_t add_subgraph_outputs[add_subgraph_outputs_size] = {2, 1}; - constexpr size_t operators_size = 1; - const Offset subgraph0_operators[operators_size] = { - CreateOperator(*builder, 0, builder->CreateVector(inputs, inputs_size), - builder->CreateVector(while_outputs, while_outputs_size), - BuiltinOptions_WhileOptions, - CreateWhileOptions(*builder, 1, 2).Union()), - }; - const Offset subgraph1_operators[operators_size] = { - CreateOperator(*builder, 1, builder->CreateVector(inputs, inputs_size), - builder->CreateVector(cond_outputs, cond_outputs_size), - BuiltinOptions_NONE), - }; - const Offset subgraph2_operators[operators_size] = { - CreateOperator(*builder, 2, builder->CreateVector(inputs, inputs_size), - builder->CreateVector(add_outputs, add_outputs_size), - BuiltinOptions_NONE), - }; - constexpr size_t subgraphs_size = 3; - const Offset subgraphs[subgraphs_size] = { - CreateSubGraph(*builder, builder->CreateVector(subgraph0_tensors, 4), - builder->CreateVector(inputs, inputs_size), - builder->CreateVector(while_outputs, while_outputs_size), - builder->CreateVector(subgraph0_operators, operators_size), - builder->CreateString("while_subgraph")), - CreateSubGraph(*builder, builder->CreateVector(subgraph1_tensors, 3), - builder->CreateVector(inputs, inputs_size), - builder->CreateVector(cond_outputs, cond_outputs_size), - builder->CreateVector(subgraph1_operators, operators_size), - builder->CreateString("cond_subgraph")), - CreateSubGraph(*builder, builder->CreateVector(subgraph2_tensors, 3), - builder->CreateVector(inputs, inputs_size), - builder->CreateVector(add_subgraph_outputs, - add_subgraph_outputs_size), - builder->CreateVector(subgraph2_operators, operators_size), - builder->CreateString("body_subgraph")), - }; - constexpr size_t operator_codes_size = 3; - const Offset operator_codes[operator_codes_size] = { - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "multiple_inputs_op", - /*version=*/0, BuiltinOperator_WHILE), - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "multiple_inputs_op", - /*version=*/0, BuiltinOperator_LESS), - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "multiple_inputs_op", - /*version=*/0, BuiltinOperator_ADD), - }; - const Offset model_offset = CreateModel( - *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), - builder->CreateVector(subgraphs, subgraphs_size), - builder->CreateString("test_model"), - builder->CreateVector(buffers, buffers_size)); - FinishModelBuffer(*builder, model_offset); - void* model_pointer = builder->GetBufferPointer(); - const Model* model = flatbuffers::GetRoot(model_pointer); - return model; -} - -// Build a model with If and two subgraphs: two data tensors A1 of size 2, A2 of -// size 4 are first concatenated, then cut to a new tensor A3 of size 3; the new -// tensor A3 of size 3 is then concatenated with A2 tensor of size 4 to produce -// a final output tensor A4. This model is specially crafted to capture the -// corner case outlined in go/avoid-memory-corruption-in-if-operator. -// -// Subgraph0 -// A0(1) A2_0(4) A1_0(2) -// | | | ---+ -// v v v | -// +--------------+ | -// | IF | | -// +------+-------+ | -// | A3_0(3) | -// v | -// +--------------+ | -// | CUSTOM |<---+ -// +------+-------+ -// | -// v -// A4_0(8) -// -// Subgraph1/2 -// A1_1(2) A2_1(4) -// | | -// v v -// +---------------+ -// | CUSTOM | -// +-------+-------+ -// | -// v A3_1(3) -// -// And it leads to memory plan as below -// -// Subgraph0 Layout -// -// -// <------------A4_0 -------------> <----- A2_0-------> <----A3_0 ---> -// +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ -// | | | | | | | | | 3 | 4 | 5 | 6 | | | | -// +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+ -// -// +----+----+----+ -// | 1 | 2 | A0 | -// +----+----+----+ -// <---A1_0--> -// -// Subgraph 1 Layout -// -// +----+----+----+----+----+----+----+----+----+ -// | | | | | | | | | | -// +----+----+----+----+----+----+----+----+----+ -// -// -// <------A2_1 -------><----A3_1 ---><--A1_1---> -// -// -// A1_1 of subgraph 1 will overlap with A2_0 of subgraph 0. -// In a buggy implementation of IF, two overwrite may happen: -// 1. copying input from A1_0 to A1_1 overwrites A2_0 before A2_0 is copied to -// A2_1; thus subgraph 1 produce incorrect output. -// 2. copying output from A3_1 to A4_0 overwrites A1_0, which should remain -// intact so that it can be used by the OP after the IF operator in subgraph 0 -// - -const Model* BuildModelWithIfAndSubgraphInputTensorOverlap() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - - constexpr TensorType kTensorType = TensorType_INT32; - constexpr int kBlockSize = - tflite::MicroArenaBufferAlignment() / sizeof(int32_t); - constexpr size_t kBuffersCount = 1; - const Offset buffers[kBuffersCount] = { - CreateBuffer(*builder), - }; - const int32_t kConditionTensorShape[] = {1}; - const int32_t kIfInput1TensorShape[] = {2 * kBlockSize}; - const int32_t kIfInput2TensorShape[] = {4 * kBlockSize}; - const int32_t kIfOutputTensorShape[] = {3 * kBlockSize}; - const int32_t kFinalOutputTensorShape[] = {8 * kBlockSize}; - constexpr size_t kSubgraph0TensorsCount = 5; - const Offset kSubgraph0Tensors[kSubgraph0TensorsCount] = { - CreateTensor(*builder, builder->CreateVector(kConditionTensorShape, 1), - TensorType_BOOL, 0, - builder->CreateString("condition tensor"), 0, false), - CreateTensor(*builder, builder->CreateVector(kIfInput1TensorShape, 1), - kTensorType, 0, builder->CreateString("if_input_tensor1"), 0, - false), - CreateTensor(*builder, builder->CreateVector(kIfInput2TensorShape, 1), - kTensorType, 0, builder->CreateString("if_input_tensor2"), 0, - false), - CreateTensor(*builder, builder->CreateVector(kIfOutputTensorShape, 1), - kTensorType, 0, builder->CreateString("if_output_tensor"), 0, - false), - CreateTensor(*builder, builder->CreateVector(kFinalOutputTensorShape, 1), - kTensorType, 0, builder->CreateString("final_output_tensor"), - 0, false), - }; - - // Subgraph 1 is the chosen path if condition tensor in IF is true. - constexpr size_t kSubgraph1TensorsCount = 3; - const Offset kSubgraph1Tensors[kSubgraph1TensorsCount] = { - CreateTensor(*builder, builder->CreateVector(kIfInput1TensorShape, 1), - kTensorType, 0, - builder->CreateString("subgraph1_input_tensor1"), 0, false), - CreateTensor(*builder, builder->CreateVector(kIfInput2TensorShape, 1), - kTensorType, 0, - builder->CreateString("subgraph1_input_tensor2"), 0, false), - CreateTensor(*builder, builder->CreateVector(kIfOutputTensorShape, 1), - kTensorType, 0, - builder->CreateString("subgraph1_output_tensor"), 0, false), - }; - - // Subgraph 2 is the chosen path if condition tensor in IF is false - constexpr size_t kSubgraph2TensorsCount = 3; - const Offset kSubgraph2Tensors[kSubgraph2TensorsCount] = { - CreateTensor(*builder, builder->CreateVector(kIfInput1TensorShape, 1), - kTensorType, 0, builder->CreateString("if_input_tensor1"), 0, - false), - CreateTensor(*builder, builder->CreateVector(kIfInput2TensorShape, 1), - kTensorType, 0, builder->CreateString("if_input_tensor2"), 0, - false), - CreateTensor(*builder, builder->CreateVector(kIfOutputTensorShape, 1), - kTensorType, 0, builder->CreateString("if_output_tensor"), 0, - false), - }; - - constexpr int kIfOpCodeIndex = 0; - constexpr int kCustomOpCodeIndex = 1; - - constexpr size_t kIfInputsCount = 3; - const int32_t kIfInputs[kIfInputsCount] = {0, 1, 2}; - constexpr size_t kOutputsCount = 1; - const int32_t kIfOutputs[kOutputsCount] = {3}; - constexpr size_t kOpAfterIfInputsCount = 2; - const int32_t kOpAfterIfInputs[kOpAfterIfInputsCount] = {3, 2}; - const int32_t kOpAfterIfOutputs[kOutputsCount] = {4}; - constexpr size_t kOperatorsCount = 2; - const Offset kSubgraph0Operators[kOperatorsCount] = { - CreateOperator(*builder, kIfOpCodeIndex, - builder->CreateVector(kIfInputs, kIfInputsCount), - builder->CreateVector(kIfOutputs, kOutputsCount), - BuiltinOptions_IfOptions, - CreateIfOptions(*builder, 1, 2).Union()), - CreateOperator( - *builder, kCustomOpCodeIndex, - builder->CreateVector(kOpAfterIfInputs, kOpAfterIfInputsCount), - builder->CreateVector(kOpAfterIfOutputs, kOutputsCount)), - }; - - constexpr size_t kSubgraph1InputsCount = 2; - const int32_t kSubgraph1Inputs[kSubgraph1InputsCount] = {0, 1}; - constexpr size_t kSubgraph1OutputsCount = 1; - const int32_t kSubgraph1Outputs[kSubgraph1OutputsCount] = {2}; - constexpr size_t kSubgraph1OperatorsCount = 1; - const Offset kSubgraph1Operators[kSubgraph1OperatorsCount] = { - CreateOperator( - *builder, kCustomOpCodeIndex, - builder->CreateVector(kSubgraph1Inputs, kSubgraph1InputsCount), - builder->CreateVector(kSubgraph1Outputs, kSubgraph1OutputsCount), - BuiltinOptions_NONE), - }; - - constexpr size_t kSubgraph2InputsCount = 2; - const int32_t kSubgraph2Inputs[kSubgraph2InputsCount] = {0, 1}; - constexpr size_t kSubgraph2OutputsCount = 1; - const int32_t kSubgraph2Outputs[kSubgraph2OutputsCount] = {2}; - constexpr size_t kSubgraph2OperatorsCount = 1; - const Offset kSubgraph2Operators[kSubgraph2OperatorsCount] = { - CreateOperator( - *builder, kCustomOpCodeIndex, - builder->CreateVector(kSubgraph2Inputs, kSubgraph2InputsCount), - builder->CreateVector(kSubgraph2Outputs, kSubgraph2OutputsCount), - BuiltinOptions_NONE), - }; - - constexpr size_t kSubgraphsCount = 3; - const Offset kSubgraphs[kSubgraphsCount] = { - CreateSubGraph( - *builder, - builder->CreateVector(kSubgraph0Tensors, kSubgraph0TensorsCount), - builder->CreateVector(kIfInputs, kIfInputsCount), - builder->CreateVector(kOpAfterIfOutputs, kOutputsCount), - builder->CreateVector(kSubgraph0Operators, kOperatorsCount), - builder->CreateString("if_subgraph")), - CreateSubGraph( - *builder, - builder->CreateVector(kSubgraph1Tensors, kSubgraph1TensorsCount), - builder->CreateVector(kSubgraph1Inputs, kSubgraph1InputsCount), - builder->CreateVector(kSubgraph1Outputs, kSubgraph1OutputsCount), - builder->CreateVector(kSubgraph1Operators, kSubgraph1OperatorsCount), - builder->CreateString("then_subgraph")), - CreateSubGraph( - *builder, - builder->CreateVector(kSubgraph2Tensors, kSubgraph2TensorsCount), - builder->CreateVector(kSubgraph2Inputs, kSubgraph2InputsCount), - builder->CreateVector(kSubgraph2Outputs, kSubgraph2OutputsCount), - builder->CreateVector(kSubgraph2Operators, kSubgraph2OperatorsCount), - builder->CreateString("else_subgraph")), - }; - - constexpr size_t kOperatorCodesCount = 2; - const Offset kOperatorCodes[kOperatorCodesCount] = { - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, "if", - /*version=*/0, BuiltinOperator_IF), - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "custom_packer_op", - /*version=*/0, BuiltinOperator_CUSTOM), - }; - const Offset kModelOffset = CreateModel( - *builder, 0, builder->CreateVector(kOperatorCodes, kOperatorCodesCount), - builder->CreateVector(kSubgraphs, kSubgraphsCount), - builder->CreateString("test_model"), - builder->CreateVector(buffers, kBuffersCount)); - FinishModelBuffer(*builder, kModelOffset); - void* model_pointer = builder->GetBufferPointer(); - const Model* model = flatbuffers::GetRoot(model_pointer); - return model; -} - -// Mock model with one main subgraph containing a single CALL_ONCE op (with null -// inputs and outputs) which invokes a second subgraph which has null inputs and -// outputs. -const Model* BuildSimpleMockModelWithNullInputsOutputs() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - - constexpr size_t buffers_size = 1; - const Offset buffers[buffers_size] = { - CreateBuffer(*builder), - }; - constexpr size_t tensor_shape_size = 1; - const int32_t tensor_shape[tensor_shape_size] = {0}; - constexpr size_t tensors_size = 1; - const Offset tensors[tensors_size] = { - CreateTensor(*builder, - builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, - builder->CreateString("test_input_tensor1"), 0, false), - }; - constexpr size_t subgraph0_inputs_size = 1; - const int32_t subgraph0_inputs[subgraph0_inputs_size] = {0}; - constexpr size_t subgraph0_outputs_size = 1; - const int32_t subgraph0_outputs[subgraph0_outputs_size] = {0}; - constexpr size_t operators_size = 1; - const Offset subgraph0_operators[operators_size] = { - CreateOperator(*builder, 0, {}, {}, BuiltinOptions_CallOnceOptions, - CreateCallOnceOptions(*builder, 1).Union()), - }; - const Offset subgraph1_operators[operators_size] = { - CreateOperator(*builder, 1, {}, {}, BuiltinOptions_NONE)}; - constexpr size_t subgraphs_size = 2; - const Offset subgraphs[subgraphs_size] = { - CreateSubGraph( - *builder, builder->CreateVector(tensors, tensors_size), - builder->CreateVector(subgraph0_inputs, subgraph0_inputs_size), - builder->CreateVector(subgraph0_outputs, subgraph0_outputs_size), - builder->CreateVector(subgraph0_operators, operators_size), - builder->CreateString("main_subgraph")), - CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), {}, - {}, - builder->CreateVector(subgraph1_operators, operators_size), - builder->CreateString("secondary subgraph")), - }; - constexpr size_t operator_codes_size = 2; - const Offset operator_codes[operator_codes_size] = { - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, - "call_once_op", - /*version=*/0, BuiltinOperator_CALL_ONCE), - CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, "no_op", - /*version=*/0, BuiltinOperator_CUSTOM)}; - const Offset model_offset = CreateModel( - *builder, 0, builder->CreateVector(operator_codes, operator_codes_size), - builder->CreateVector(subgraphs, subgraphs_size), - builder->CreateString("test_model"), - builder->CreateVector(buffers, buffers_size)); - FinishModelBuffer(*builder, model_offset); - void* model_pointer = builder->GetBufferPointer(); - const Model* model = flatbuffers::GetRoot(model_pointer); - return model; -} - -} // namespace - -const TfLiteRegistration* SimpleStatefulOp::getRegistration() { - return GetMutableRegistration(); -} - -TfLiteRegistration* SimpleStatefulOp::GetMutableRegistration() { - static TfLiteRegistration r; - r.init = Init; - r.prepare = Prepare; - r.invoke = Invoke; - return &r; -} - -void* SimpleStatefulOp::Init(TfLiteContext* context, const char* buffer, - size_t length) { - TFLITE_DCHECK(context->AllocateBufferForEval == nullptr); - TFLITE_DCHECK(context->GetScratchBuffer == nullptr); - TFLITE_DCHECK(context->RequestScratchBufferInArena == nullptr); - - void* raw = context->AllocatePersistentBuffer(context, sizeof(OpData)); - OpData* data = reinterpret_cast(raw); - *data = {}; - return raw; -} - -TfLiteStatus SimpleStatefulOp::Prepare(TfLiteContext* context, - TfLiteNode* node) { - OpData* data = reinterpret_cast(node->user_data); - - // Make sure that the input is in uint8_t with at least 1 data entry. - MicroContext* micro_context = GetMicroContext(context); - TfLiteTensor* input = - micro_context->AllocateTempInputTensor(node, kInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - - if (input->type != kTfLiteInt8) return kTfLiteError; - if (NumElements(input->dims) == 0) return kTfLiteError; - - // Allocate a temporary buffer with the same size of input for sorting. - TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( - context, sizeof(uint8_t) * NumElements(input->dims), - &data->sorting_buffer)); - // We can interleave scratch / persistent buffer allocation. - data->invoke_count = reinterpret_cast( - context->AllocatePersistentBuffer(context, sizeof(int))); - *data->invoke_count = 0; - - micro_context->DeallocateTempTfLiteTensor(input); - return kTfLiteOk; -} - -TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context, - TfLiteNode* node) { - OpData* data = reinterpret_cast(node->user_data); - *data->invoke_count += 1; - - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kInputTensor); - TF_LITE_ENSURE(context, input != nullptr); - const uint8_t* input_data = input->data.uint8; - int size = NumElements(input->dims); - - uint8_t* sorting_buffer = reinterpret_cast( - context->GetScratchBuffer(context, data->sorting_buffer)); - // Copy inputs data to the sorting buffer. We don't want to mutate the input - // tensor as it might be used by a another node. - for (int i = 0; i < size; i++) { - sorting_buffer[i] = input_data[i]; - } - - // In place insertion sort on `sorting_buffer`. - for (int i = 1; i < size; i++) { - for (int j = i; j > 0 && sorting_buffer[j] < sorting_buffer[j - 1]; j--) { - std::swap(sorting_buffer[j], sorting_buffer[j - 1]); - } - } - - TfLiteEvalTensor* median = - tflite::micro::GetEvalOutput(context, node, kMedianTensor); - TF_LITE_ENSURE(context, median != nullptr); - uint8_t* median_data = median->data.uint8; - TfLiteEvalTensor* invoke_count = - tflite::micro::GetEvalOutput(context, node, kInvokeCount); - TF_LITE_ENSURE(context, invoke_count != nullptr); - int32_t* invoke_count_data = invoke_count->data.i32; - - median_data[0] = sorting_buffer[size / 2]; - invoke_count_data[0] = *data->invoke_count; - return kTfLiteOk; -} - -const TfLiteRegistration* MockCustom::getRegistration() { - return GetMutableRegistration(); -} - -TfLiteRegistration* MockCustom::GetMutableRegistration() { - static TfLiteRegistration r; - r.init = Init; - r.prepare = Prepare; - r.invoke = Invoke; - r.free = Free; - return &r; -} - -void* MockCustom::Init(TfLiteContext* context, const char* buffer, - size_t length) { - // We don't support delegate in TFL micro. This is a weak check to test if - // context struct being zero-initialized. - TFLITE_DCHECK(context->ReplaceNodeSubsetsWithDelegateKernels == nullptr); - freed_ = false; - // Do nothing. - return nullptr; -} - -void MockCustom::Free(TfLiteContext* context, void* buffer) { freed_ = true; } - -TfLiteStatus MockCustom::Prepare(TfLiteContext* context, TfLiteNode* node) { - return kTfLiteOk; -} - -TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); - TF_LITE_ENSURE(context, input != nullptr); - const int32_t* input_data = input->data.i32; - const TfLiteEvalTensor* weight = - tflite::micro::GetEvalInput(context, node, 1); - TF_LITE_ENSURE(context, weight != nullptr); - const uint8_t* weight_data = weight->data.uint8; - TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); - TF_LITE_ENSURE(context, output != nullptr); - int32_t* output_data = output->data.i32; - output_data[0] = - 0; // Catch output tensor sharing memory with an input tensor - output_data[0] = input_data[0] + weight_data[0]; - return kTfLiteOk; -} - -bool MockCustom::freed_ = false; - -const TfLiteRegistration* MultipleInputs::getRegistration() { - return GetMutableRegistration(); -} - -TfLiteRegistration* MultipleInputs::GetMutableRegistration() { - static TfLiteRegistration r; - r.init = Init; - r.prepare = Prepare; - r.invoke = Invoke; - r.free = Free; - return &r; -} - -void* MultipleInputs::Init(TfLiteContext* context, const char* buffer, - size_t length) { - // We don't support delegate in TFL micro. This is a weak check to test if - // context struct being zero-initialized. - TFLITE_DCHECK(context->ReplaceNodeSubsetsWithDelegateKernels == nullptr); - freed_ = false; - // Do nothing. - return nullptr; -} - -void MultipleInputs::Free(TfLiteContext* context, void* buffer) { - freed_ = true; -} - -TfLiteStatus MultipleInputs::Prepare(TfLiteContext* context, TfLiteNode* node) { - return kTfLiteOk; -} - -TfLiteStatus MultipleInputs::Invoke(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); - TF_LITE_ENSURE(context, input != nullptr); - const int32_t* input_data = input->data.i32; - const TfLiteEvalTensor* input1 = - tflite::micro::GetEvalInput(context, node, 1); - TF_LITE_ENSURE(context, input1 != nullptr); - const int32_t* input_data1 = input1->data.i32; - const TfLiteEvalTensor* input2 = - tflite::micro::GetEvalInput(context, node, 2); - TF_LITE_ENSURE(context, input2 != nullptr); - const int32_t* input_data2 = input2->data.i32; - - TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); - TF_LITE_ENSURE(context, output != nullptr); - int32_t* output_data = output->data.i32; - output_data[0] = - 0; // Catch output tensor sharing memory with an input tensor - output_data[0] = input_data[0] + input_data1[0] + input_data2[0]; - return kTfLiteOk; -} - -bool MultipleInputs::freed_ = false; - -const TfLiteRegistration* NoOp::getRegistration() { - return GetMutableRegistration(); -} - -TfLiteRegistration* NoOp::GetMutableRegistration() { - static TfLiteRegistration r; - r.init = Init; - r.prepare = Prepare; - r.invoke = Invoke; - r.free = Free; - return &r; -} - -void* NoOp::Init(TfLiteContext* context, const char* buffer, size_t length) { - // We don't support delegate in TFL micro. This is a weak check to test if - // context struct being zero-initialized. - TFLITE_DCHECK(context->ReplaceNodeSubsetsWithDelegateKernels == nullptr); - freed_ = false; - // Do nothing. - return nullptr; -} - -void NoOp::Free(TfLiteContext* context, void* buffer) { freed_ = true; } - -TfLiteStatus NoOp::Prepare(TfLiteContext* context, TfLiteNode* node) { - return kTfLiteOk; -} - -TfLiteStatus NoOp::Invoke(TfLiteContext* context, TfLiteNode* node) { - return kTfLiteOk; -} - -bool NoOp::freed_ = false; - -AllOpsResolver GetOpResolver() { - AllOpsResolver op_resolver; - op_resolver.AddCustom("mock_custom", MockCustom::GetMutableRegistration()); - op_resolver.AddCustom("simple_stateful_op", - SimpleStatefulOp::GetMutableRegistration()); - op_resolver.AddCustom("multiple_inputs_op", - MultipleInputs::GetMutableRegistration()); - op_resolver.AddCustom("no_op", NoOp::GetMutableRegistration()); - op_resolver.AddCustom("custom_packer_op", PackerOp::GetMutableRegistration()); - return op_resolver; -} - -const Model* GetModelWithUnusedInputs() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildModelWithUnusedInputs()); - } - return model; -} - -const Model* GetModelWithUnusedOperatorOutputs() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildModelWithUnusedOperatorOutputs()); - } - return model; -} - -const Model* GetModelWith256x256Tensor() { - static const Model* model = BuildModelWith256x256Tensor(); - return model; -} - -const Model* GetSimpleMockModel() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildSimpleMockModel()); - } - return model; -} - -const Model* GetSimpleMultipleInputsModel() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildSimpleMultipleInputsModel()); - } - return model; -} - -const Model* GetSimpleModelWithSubgraphsAndIf() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildSimpleModelWithSubgraphsAndIf()); - } - return model; -} - -const Model* GetSimpleModelWithSubgraphsAndWhile() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildSimpleModelWithSubgraphsAndWhile()); - } - return model; -} - -const Model* GetModelWithIfAndSubgraphInputTensorOverlap() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildModelWithIfAndSubgraphInputTensorOverlap()); - } - return model; -} - -const Model* GetSimpleModelWithNullInputsAndOutputs() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildSimpleMockModelWithNullInputsOutputs()); - } - return model; -} - -const Model* GetComplexMockModel() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildComplexMockModel()); - } - return model; -} - -const Model* GetSimpleModelWithBranch() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildSimpleModelWithBranch()); - } - return model; -} - -const Model* GetModelWithOfflinePlanning(int num_tensors, - const int32_t* metadata_buffer, - NodeConnection* node_conn, - int num_conns, - int num_subgraph_inputs) { - const Model* model = BuildModelWithOfflinePlanning( - num_tensors, metadata_buffer, node_conn, num_conns, num_subgraph_inputs); - return model; -} - -const Model* GetSimpleStatefulModel() { - static Model* model = nullptr; - if (!model) { - model = const_cast(BuildSimpleStatefulModel()); - } - return model; -} - -const Tensor* Create1dFlatbufferTensor(int size, bool is_variable) { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - constexpr size_t tensor_shape_size = 1; - const int32_t tensor_shape[tensor_shape_size] = {size}; - const Offset tensor_offset = CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, builder->CreateString("test_tensor"), 0, - is_variable); - builder->Finish(tensor_offset); - void* tensor_pointer = builder->GetBufferPointer(); - const Tensor* tensor = flatbuffers::GetRoot(tensor_pointer); - return tensor; -} - -const Tensor* CreateQuantizedFlatbufferTensor(int size) { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - constexpr size_t quant_params_size = 1; - const float min_array[quant_params_size] = {0.1f}; - const float max_array[quant_params_size] = {0.2f}; - const float scale_array[quant_params_size] = {0.3f}; - const int64_t zero_point_array[quant_params_size] = {100ll}; - - const Offset quant_params = - CreateQuantizationParameters( - *builder, - /*min=*/builder->CreateVector(min_array, quant_params_size), - /*max=*/builder->CreateVector(max_array, quant_params_size), - /*scale=*/ - builder->CreateVector(scale_array, quant_params_size), - /*zero_point=*/ - builder->CreateVector(zero_point_array, quant_params_size)); - - constexpr size_t tensor_shape_size = 1; - const int32_t tensor_shape[tensor_shape_size] = {size}; - const Offset tensor_offset = CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, builder->CreateString("test_tensor"), quant_params, - false); - builder->Finish(tensor_offset); - void* tensor_pointer = builder->GetBufferPointer(); - const Tensor* tensor = flatbuffers::GetRoot(tensor_pointer); - return tensor; -} - -const Tensor* CreateMissingQuantizationFlatbufferTensor(int size) { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - const Offset quant_params = - CreateQuantizationParameters(*builder, 0, 0, 0, 0, - QuantizationDetails_NONE, 0, 0); - constexpr size_t tensor_shape_size = 1; - const int32_t tensor_shape[tensor_shape_size] = {size}; - const Offset tensor_offset = CreateTensor( - *builder, builder->CreateVector(tensor_shape, tensor_shape_size), - TensorType_INT32, 0, builder->CreateString("test_tensor"), quant_params, - false); - builder->Finish(tensor_offset); - void* tensor_pointer = builder->GetBufferPointer(); - const Tensor* tensor = flatbuffers::GetRoot(tensor_pointer); - return tensor; -} - -const flatbuffers::Vector>* -CreateFlatbufferBuffers() { - using flatbuffers::Offset; - flatbuffers::FlatBufferBuilder* builder = BuilderInstance(); - constexpr size_t buffers_size = 1; - const Offset buffers[buffers_size] = { - CreateBuffer(*builder), - }; - const flatbuffers::Offset>> - buffers_offset = builder->CreateVector(buffers, buffers_size); - builder->Finish(buffers_offset); - void* buffers_pointer = builder->GetBufferPointer(); - const flatbuffers::Vector>* result = - flatbuffers::GetRoot>>( - buffers_pointer); - return result; -} - -int TestStrcmp(const char* a, const char* b) { - if ((a == nullptr) || (b == nullptr)) { - return -1; - } - while ((*a != 0) && (*a == *b)) { - a++; - b++; - } - return *reinterpret_cast(a) - - *reinterpret_cast(b); -} - -// Wrapper to forward kernel errors to the interpreter's error reporter. -void ReportOpError(struct TfLiteContext* context, const char* format, ...) { -#ifndef TF_LITE_STRIP_ERROR_STRINGS - ErrorReporter* error_reporter = static_cast(context->impl_); - va_list args; - va_start(args, format); - TF_LITE_REPORT_ERROR(error_reporter, format, args); - va_end(args); -#endif -} - -// Create a TfLiteIntArray from an array of ints. The first element in the -// supplied array must be the size of the array expressed as an int. -TfLiteIntArray* IntArrayFromInts(int* int_array) { - return reinterpret_cast(int_array); -} - -// Create a TfLiteFloatArray from an array of floats. The first element in the -// supplied array must be the size of the array expressed as a float. -TfLiteFloatArray* FloatArrayFromFloats(const float* floats) { - static_assert(sizeof(float) == sizeof(int), - "assumes sizeof(float) == sizeof(int) to perform casting"); - int size = static_cast(floats[0]); - *reinterpret_cast(const_cast(floats)) = size; - return reinterpret_cast(const_cast(floats)); -} - -TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized, - TfLiteIntArray* dims, float input_scale, - float weights_scale, bool is_variable) { - float bias_scale = input_scale * weights_scale; - tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale); - - // Quantized int16_t tensors always have a zero point of 0, since the range of - // int16_t values is large, and because zero point costs extra cycles during - // processing. - TfLiteTensor result = - CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable); - return result; -} - -TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized, - TfLiteIntArray* dims, float input_scale, - float weights_scale, bool is_variable) { - float bias_scale = input_scale * weights_scale; - tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale); - - // Quantized int32_t tensors always have a zero point of 0, since the range of - // int32_t values is large, and because zero point costs extra cycles during - // processing. - TfLiteTensor result = - CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable); - return result; -} - -TfLiteTensor CreateQuantizedBiasTensor(const float* data, - std::int64_t* quantized, - TfLiteIntArray* dims, float input_scale, - float weights_scale, bool is_variable) { - float bias_scale = input_scale * weights_scale; - tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale); - - // Quantized int32_t tensors always have a zero point of 0, since the range of - // int32_t values is large, and because zero point costs extra cycles during - // processing. - TfLiteTensor result = - CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable); - return result; -} - -// Quantizes int32_t bias tensor with per-channel weights determined by input -// scale multiplied by weight scale for each channel. -template -TfLiteTensor CreatePerChannelQuantizedBiasTensor( - const float* input, T* quantized, TfLiteIntArray* dims, float input_scale, - float* weight_scales, float* scales, int* zero_points, - TfLiteAffineQuantization* affine_quant, int quantized_dimension, - bool is_variable) { - int input_size = ElementCount(*dims); - int num_channels = dims->data[quantized_dimension]; - // First element is reserved for array length - zero_points[0] = num_channels; - scales[0] = static_cast(num_channels); - float* scales_array = &scales[1]; - for (int i = 0; i < num_channels; i++) { - scales_array[i] = input_scale * weight_scales[i]; - zero_points[i + 1] = 0; - } - - SymmetricPerChannelQuantize(input, quantized, input_size, num_channels, - scales_array); - - affine_quant->scale = FloatArrayFromFloats(scales); - affine_quant->zero_point = IntArrayFromInts(zero_points); - affine_quant->quantized_dimension = quantized_dimension; - - TfLiteTensor result = CreateTensor(quantized, dims, is_variable); - result.quantization = {kTfLiteAffineQuantization, affine_quant}; - return result; -} - -TfLiteTensor CreatePerChannelQuantizedBiasTensor( - const float* input, int32_t* quantized, TfLiteIntArray* dims, - float input_scale, float* weight_scales, float* scales, int* zero_points, - TfLiteAffineQuantization* affine_quant, int quantized_dimension, - bool is_variable) { - return CreatePerChannelQuantizedBiasTensor( - input, quantized, dims, input_scale, weight_scales, scales, zero_points, - affine_quant, quantized_dimension, is_variable); -} - -TfLiteTensor CreatePerChannelQuantizedBiasTensor( - const float* input, std::int64_t* quantized, TfLiteIntArray* dims, - float input_scale, float* weight_scales, float* scales, int* zero_points, - TfLiteAffineQuantization* affine_quant, int quantized_dimension, - bool is_variable) { - return CreatePerChannelQuantizedBiasTensor( - input, quantized, dims, input_scale, weight_scales, scales, zero_points, - affine_quant, quantized_dimension, is_variable); -} - -TfLiteTensor CreateSymmetricPerChannelQuantizedTensor( - const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales, - int* zero_points, TfLiteAffineQuantization* affine_quant, - int quantized_dimension, bool is_variable) { - int channel_count = dims->data[quantized_dimension]; - scales[0] = static_cast(channel_count); - zero_points[0] = channel_count; - - SignedSymmetricPerChannelQuantize(input, dims, quantized_dimension, quantized, - &scales[1]); - - for (int i = 0; i < channel_count; i++) { - zero_points[i + 1] = 0; - } - - affine_quant->scale = FloatArrayFromFloats(scales); - affine_quant->zero_point = IntArrayFromInts(zero_points); - affine_quant->quantized_dimension = quantized_dimension; - - TfLiteTensor result = CreateTensor(quantized, dims, is_variable); - result.quantization = {kTfLiteAffineQuantization, affine_quant}; - return result; -} - -size_t GetModelTensorCount(const Model* model) { - auto* subgraphs = model->subgraphs(); - if (subgraphs) { - return (*subgraphs)[0]->tensors()->size(); - } - return 0; -} - -} // namespace testing -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/micro/test_helpers.h b/code/components/tflite-lib/tensorflow/lite/micro/test_helpers.h deleted file mode 100644 index 5441ce3e..00000000 --- a/code/components/tflite-lib/tensorflow/lite/micro/test_helpers.h +++ /dev/null @@ -1,298 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_ -#define TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_ - -#include -#include - -#include "flatbuffers/flatbuffers.h" // from @flatbuffers -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/compatibility.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" -#include "tensorflow/lite/micro/micro_utils.h" -#include "tensorflow/lite/portable_type_to_tflitetype.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { -namespace testing { - -constexpr int kOfflinePlannerHeaderSize = 3; - -struct NodeConnection_ { - std::initializer_list input; - std::initializer_list output; -}; -typedef struct NodeConnection_ NodeConnection; - -// A simple operator that returns the median of the input with the number of -// times the kernel was invoked. The implementation below is deliberately -// complicated, just to demonstrate how kernel memory planning works. -class SimpleStatefulOp { - static constexpr int kBufferNotAllocated = 0; - // Inputs: - static constexpr int kInputTensor = 0; - // Outputs: - static constexpr int kMedianTensor = 0; - static constexpr int kInvokeCount = 1; - struct OpData { - int* invoke_count = nullptr; - int sorting_buffer = kBufferNotAllocated; - }; - - public: - static const TfLiteRegistration* getRegistration(); - static TfLiteRegistration* GetMutableRegistration(); - static void* Init(TfLiteContext* context, const char* buffer, size_t length); - static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); - static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); -}; - -class MockCustom { - public: - static const TfLiteRegistration* getRegistration(); - static TfLiteRegistration* GetMutableRegistration(); - static void* Init(TfLiteContext* context, const char* buffer, size_t length); - static void Free(TfLiteContext* context, void* buffer); - static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); - static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); - - static bool freed_; -}; - -// A simple operator with the purpose of testing multiple inputs. It returns -// the sum of the inputs. -class MultipleInputs { - public: - static const TfLiteRegistration* getRegistration(); - static TfLiteRegistration* GetMutableRegistration(); - static void* Init(TfLiteContext* context, const char* buffer, size_t length); - static void Free(TfLiteContext* context, void* buffer); - static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); - static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); - - static bool freed_; -}; - -// A simple no-op operator. -class NoOp { - public: - static const TfLiteRegistration* getRegistration(); - static TfLiteRegistration* GetMutableRegistration(); - static void* Init(TfLiteContext* context, const char* buffer, size_t length); - static void Free(TfLiteContext* context, void* buffer); - static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); - static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); - - static bool freed_; -}; - -// Returns an Op Resolver that can be used in the testing code. -AllOpsResolver GetOpResolver(); - -// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input, -// 1 layer of weights, 1 output Tensor, and 1 operator. -const Model* GetSimpleMockModel(); - -// Returns a flatbuffer TensorFlow Lite model with more inputs, variable -// tensors, and operators. -const Model* GetComplexMockModel(); - -// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input, -// 1 layer of weights, 1 output Tensor, and 1 operator. -// The size of all three tensors is 256 x 256, which is larger than what other -// models provide from this test helper. -const Model* GetModelWith256x256Tensor(); - -// Returns a simple flatbuffer model with two branches. -const Model* GetSimpleModelWithBranch(); - -// Returns a simple example flatbuffer TensorFlow Lite model. Contains 3 inputs, -// 1 output Tensor, and 1 operator. -const Model* GetSimpleMultipleInputsModel(); - -// Returns a simple flatbuffer model with offline planned tensors -// @param[in] num_tensors Number of tensors in the model. -// @param[in] metadata_buffer Metadata for offline planner. -// @param[in] node_con List of connections, i.e. operators -// in the model. -// @param[in] num_conns Number of connections. -// @param[in] num_subgraph_inputs How many of the input tensors are in -// the subgraph inputs. The default value -// of 0 means all of the input tensors -// are in the subgraph input list. There -// must be at least 1 input tensor in the -// subgraph input list. -const Model* GetModelWithOfflinePlanning(int num_tensors, - const int32_t* metadata_buffer, - NodeConnection* node_conn, - int num_conns, - int num_subgraph_inputs = 0); - -// Returns a flatbuffer with a single operator, two inputs (one unused) and one -// output. -const Model* GetModelWithUnusedInputs(); - -// Returns a flatbuffer with a single operator, zero inputs and two outputs -// (one unused). -const Model* GetModelWithUnusedOperatorOutputs(); - -// Returns a flatbuffer model with `simple_stateful_op` -const Model* GetSimpleStatefulModel(); - -// Returns a flatbuffer model with "if" and two subgraphs. -const Model* GetSimpleModelWithSubgraphsAndIf(); - -// Returns a flatbuffer model with "while" and three subgraphs. -const Model* GetSimpleModelWithSubgraphsAndWhile(); - -// Returns a flatbuffer model with "if" and two subgraphs and the input tensor 1 -// of "if" subgraph overlaps with the input tensor 2 of subgraph 1. -const Model* GetModelWithIfAndSubgraphInputTensorOverlap(); - -// Returns a flatbuffer model with null subgraph/operator inputs and outputs. -const Model* GetSimpleModelWithNullInputsAndOutputs(); - -// Builds a one-dimensional flatbuffer tensor of the given size. -const Tensor* Create1dFlatbufferTensor(int size, bool is_variable = false); - -// Builds a one-dimensional flatbuffer tensor of the given size with -// quantization metadata. -const Tensor* CreateQuantizedFlatbufferTensor(int size); - -// Creates a one-dimensional tensor with no quantization metadata. -const Tensor* CreateMissingQuantizationFlatbufferTensor(int size); - -// Creates a vector of flatbuffer buffers. -const flatbuffers::Vector>* -CreateFlatbufferBuffers(); - -// Performs a simple string comparison without requiring standard C library. -int TestStrcmp(const char* a, const char* b); - -// Wrapper to forward kernel errors to the interpreter's error reporter. -void ReportOpError(struct TfLiteContext* context, const char* format, ...); - -void PopulateContext(TfLiteTensor* tensors, int tensors_size, - TfLiteContext* context); - -// Create a TfLiteIntArray from an array of ints. The first element in the -// supplied array must be the size of the array expressed as an int. -TfLiteIntArray* IntArrayFromInts(int* int_array); - -// Create a TfLiteFloatArray from an array of floats. The first element in the -// supplied array must be the size of the array expressed as a float. -TfLiteFloatArray* FloatArrayFromFloats(const float* floats); - -template -TfLiteTensor CreateTensor(const T* data, TfLiteIntArray* dims, - const bool is_variable = false) { - TfLiteTensor result; - result.dims = dims; - result.params = {}; - result.quantization = {kTfLiteNoQuantization, nullptr}; - result.is_variable = is_variable; - result.allocation_type = kTfLiteMemNone; - result.type = typeToTfLiteType(); - // Const cast is used to allow passing in const and non-const arrays within a - // single CreateTensor method. A Const array should be used for immutable - // input tensors and non-const array should be used for mutable and output - // tensors. - result.data.data = const_cast(data); - result.quantization = {kTfLiteAffineQuantization, nullptr}; - result.bytes = ElementCount(*dims) * sizeof(T); - return result; -} - -template -TfLiteTensor CreateQuantizedTensor(const T* data, TfLiteIntArray* dims, - const float scale, const int zero_point = 0, - const bool is_variable = false) { - TfLiteTensor result = CreateTensor(data, dims, is_variable); - result.params = {scale, zero_point}; - result.quantization = {kTfLiteAffineQuantization, nullptr}; - return result; -} - -template -TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized, - TfLiteIntArray* dims, float scale, - int zero_point, bool is_variable = false) { - int input_size = ElementCount(*dims); - tflite::Quantize(input, quantized, input_size, scale, zero_point); - return CreateQuantizedTensor(quantized, dims, scale, zero_point, is_variable); -} - -TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized, - TfLiteIntArray* dims, float input_scale, - float weights_scale, - bool is_variable = false); - -TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized, - TfLiteIntArray* dims, float input_scale, - float weights_scale, - bool is_variable = false); - -TfLiteTensor CreateQuantizedBiasTensor(const float* data, - std::int64_t* quantized, - TfLiteIntArray* dims, float input_scale, - float weights_scale, - bool is_variable = false); - -// Quantizes int32_t bias tensor with per-channel weights determined by input -// scale multiplied by weight scale for each channel. -TfLiteTensor CreatePerChannelQuantizedBiasTensor( - const float* input, int32_t* quantized, TfLiteIntArray* dims, - float input_scale, float* weight_scales, float* scales, int* zero_points, - TfLiteAffineQuantization* affine_quant, int quantized_dimension, - bool is_variable = false); - -// Quantizes int64_t bias tensor with per-channel weights determined by input -// scale multiplied by weight scale for each channel. -TfLiteTensor CreatePerChannelQuantizedBiasTensor( - const float* input, std::int64_t* quantized, TfLiteIntArray* dims, - float input_scale, float* weight_scales, float* scales, int* zero_points, - TfLiteAffineQuantization* affine_quant, int quantized_dimension, - bool is_variable = false); - -TfLiteTensor CreateSymmetricPerChannelQuantizedTensor( - const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales, - int* zero_points, TfLiteAffineQuantization* affine_quant, - int quantized_dimension, bool is_variable = false); - -// Returns the number of tensors in the default subgraph for a tflite::Model. -size_t GetModelTensorCount(const Model* model); - -// Derives the quantization scaling factor from a min and max range. -template -inline float ScaleFromMinMax(const float min, const float max) { - return (max - min) / - static_cast((std::numeric_limits::max() * 1.0) - - std::numeric_limits::min()); -} - -// Derives the quantization zero point from a min and max range. -template -inline int ZeroPointFromMinMax(const float min, const float max) { - return static_cast(std::numeric_limits::min()) + - static_cast(-min / ScaleFromMinMax(min, max) + 0.5f); -} - -} // namespace testing -} // namespace tflite - -#endif // TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/portable_type_to_tflitetype.h b/code/components/tflite-lib/tensorflow/lite/portable_type_to_tflitetype.h deleted file mode 100644 index 52d7fdef..00000000 --- a/code/components/tflite-lib/tensorflow/lite/portable_type_to_tflitetype.h +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_PORTABLE_TYPE_TO_TFLITETYPE_H_ -#define TENSORFLOW_LITE_PORTABLE_TYPE_TO_TFLITETYPE_H_ - -// Most of the definitions have been moved to this subheader so that Micro -// can include it without relying on and , which isn't -// available on all platforms. - -// Arduino build defines abs as a macro here. That is invalid C++, and breaks -// libc++'s header, undefine it. -#ifdef abs -#undef abs -#endif - -#include - -#include "tensorflow/lite/c/common.h" - -namespace tflite { - -// Map statically from a C++ type to a TfLiteType. Used in interpreter for -// safe casts. -// Example: -// typeToTfLiteType() -> kTfLiteBool -template -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteNoType; -} -// Map from TfLiteType to the corresponding C++ type. -// Example: -// TfLiteTypeToType::Type -> bool -template -struct TfLiteTypeToType {}; // Specializations below - -// Template specialization for both typeToTfLiteType and TfLiteTypeToType. -#define MATCH_TYPE_AND_TFLITE_TYPE(CPP_TYPE, TFLITE_TYPE_ENUM) \ - template <> \ - constexpr TfLiteType typeToTfLiteType() { \ - return TFLITE_TYPE_ENUM; \ - } \ - template <> \ - struct TfLiteTypeToType { \ - using Type = CPP_TYPE; \ - } - -// No string mapping is included here, since the TF Lite packed representation -// doesn't correspond to a C++ type well. -MATCH_TYPE_AND_TFLITE_TYPE(int32_t, kTfLiteInt32); -MATCH_TYPE_AND_TFLITE_TYPE(uint32_t, kTfLiteUInt32); -MATCH_TYPE_AND_TFLITE_TYPE(int16_t, kTfLiteInt16); -MATCH_TYPE_AND_TFLITE_TYPE(uint16_t, kTfLiteUInt16); -MATCH_TYPE_AND_TFLITE_TYPE(int64_t, kTfLiteInt64); -MATCH_TYPE_AND_TFLITE_TYPE(float, kTfLiteFloat32); -MATCH_TYPE_AND_TFLITE_TYPE(unsigned char, kTfLiteUInt8); -MATCH_TYPE_AND_TFLITE_TYPE(int8_t, kTfLiteInt8); -MATCH_TYPE_AND_TFLITE_TYPE(bool, kTfLiteBool); -MATCH_TYPE_AND_TFLITE_TYPE(TfLiteFloat16, kTfLiteFloat16); -MATCH_TYPE_AND_TFLITE_TYPE(double, kTfLiteFloat64); -MATCH_TYPE_AND_TFLITE_TYPE(uint64_t, kTfLiteUInt64); - -} // namespace tflite -#endif // TENSORFLOW_LITE_PORTABLE_TYPE_TO_TFLITETYPE_H_ diff --git a/code/components/tflite-lib/tensorflow/lite/schema/schema_utils.cc b/code/components/tflite-lib/tensorflow/lite/schema/schema_utils.cc deleted file mode 100644 index fc19290b..00000000 --- a/code/components/tflite-lib/tensorflow/lite/schema/schema_utils.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/schema/schema_utils.h" - -#include - -#include "tensorflow/lite/kernels/internal/compatibility.h" - -namespace tflite { - -// The following GetBuiltinCode methods are the utility methods for reading -// builtin operatore code, ensuring compatibility issues between v3 and v3a -// schema. Always the maximum value of the two fields always will be the correct -// value as follows: -// -// - Supporting schema version v3 models -// -// The `builtin_code` field is not available in the v3 models. Flatbuffer -// library will feed zero value, which is the default value in the v3a schema. -// The actual builtin operatore code value will exist in the -// `deprecated_builtin_code` field. At the same time, it implies that -// `deprecated_builtin_code` >= `builtin_code` and the maximum value of the two -// fields will be same with `deprecated_builtin_code'. -// -// - Supporting builtin operator codes beyonds 127 -// -// New builtin operators, whose operator code is larger than 127, can not be -// assigned to the `deprecated_builtin_code` field. In such cases, the -// value of the `builtin_code` field should be used for the builtin operator -// code. In the case, the maximum value of the two fields will be the value of -// the `builtin_code` as the right value. - -BuiltinOperator GetBuiltinCode(const OperatorCode* op_code) { - // Caller should guarantee that the given argument value is not a nullptr. - TFLITE_DCHECK(op_code != nullptr); - - return std::max( - op_code->builtin_code(), - static_cast(op_code->deprecated_builtin_code())); -} - -BuiltinOperator GetBuiltinCode(const OperatorCodeT* op_code) { - // Caller should guarantee that the given argument value is not a nullptr. - TFLITE_DCHECK(op_code != nullptr); - - return std::max(op_code->builtin_code, static_cast( - op_code->deprecated_builtin_code)); -} - -} // namespace tflite diff --git a/code/components/tflite-lib/tensorflow/lite/schema/schema_utils.h b/code/components/tflite-lib/tensorflow/lite/schema/schema_utils.h deleted file mode 100644 index 9cca36c7..00000000 --- a/code/components/tflite-lib/tensorflow/lite/schema/schema_utils.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_SCHEMA_SCHEMA_UTILS_H_ -#define TENSORFLOW_LITE_SCHEMA_SCHEMA_UTILS_H_ - -#include "flatbuffers/flatbuffers.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { - -// The following methods are introduced to resolve op builtin code shortage -// problem. The new builtin operator will be assigned to the extended builtin -// code field in the flatbuffer schema. Those methods helps to hide builtin code -// details. -BuiltinOperator GetBuiltinCode(const OperatorCode *op_code); - -BuiltinOperator GetBuiltinCode(const OperatorCodeT *op_code); - -} // namespace tflite - -#endif // TENSORFLOW_LITE_SCHEMA_SCHEMA_UTILS_H_ diff --git a/code/components/tflite-lib/third_party/flatbuffers/LICENSE.txt b/code/components/tflite-lib/third_party/flatbuffers/LICENSE.txt deleted file mode 100644 index d6456956..00000000 --- a/code/components/tflite-lib/third_party/flatbuffers/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/allocator.h b/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/allocator.h deleted file mode 100644 index f4ef22db..00000000 --- a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/allocator.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2021 Google Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FLATBUFFERS_ALLOCATOR_H_ -#define FLATBUFFERS_ALLOCATOR_H_ - -#include "flatbuffers/base.h" - -namespace flatbuffers { - -// Allocator interface. This is flatbuffers-specific and meant only for -// `vector_downward` usage. -class Allocator { - public: - virtual ~Allocator() {} - - // Allocate `size` bytes of memory. - virtual uint8_t *allocate(size_t size) = 0; - - // Deallocate `size` bytes of memory at `p` allocated by this allocator. - virtual void deallocate(uint8_t *p, size_t size) = 0; - - // Reallocate `new_size` bytes of memory, replacing the old region of size - // `old_size` at `p`. In contrast to a normal realloc, this grows downwards, - // and is intended specifcally for `vector_downward` use. - // `in_use_back` and `in_use_front` indicate how much of `old_size` is - // actually in use at each end, and needs to be copied. - virtual uint8_t *reallocate_downward(uint8_t *old_p, size_t old_size, - size_t new_size, size_t in_use_back, - size_t in_use_front) { - FLATBUFFERS_ASSERT(new_size > old_size); // vector_downward only grows - uint8_t *new_p = allocate(new_size); - memcpy_downward(old_p, old_size, new_p, new_size, in_use_back, - in_use_front); - deallocate(old_p, old_size); - return new_p; - } - - protected: - // Called by `reallocate_downward` to copy memory from `old_p` of `old_size` - // to `new_p` of `new_size`. Only memory of size `in_use_front` and - // `in_use_back` will be copied from the front and back of the old memory - // allocation. - void memcpy_downward(uint8_t *old_p, size_t old_size, uint8_t *new_p, - size_t new_size, size_t in_use_back, - size_t in_use_front) { - memcpy(new_p + new_size - in_use_back, old_p + old_size - in_use_back, - in_use_back); - memcpy(new_p, old_p, in_use_front); - } -}; - -} // namespace flatbuffers - -#endif // FLATBUFFERS_ALLOCATOR_H_ \ No newline at end of file diff --git a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/array.h b/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/array.h deleted file mode 100644 index d4b73fc9..00000000 --- a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/array.h +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright 2021 Google Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FLATBUFFERS_ARRAY_H_ -#define FLATBUFFERS_ARRAY_H_ - -#include "flatbuffers/base.h" -#include "flatbuffers/stl_emulation.h" -#include "flatbuffers/vector.h" - -namespace flatbuffers { - -// This is used as a helper type for accessing arrays. -template class Array { - // Array can carry only POD data types (scalars or structs). - typedef typename flatbuffers::bool_constant::value> - scalar_tag; - typedef - typename flatbuffers::conditional::type - IndirectHelperType; - - public: - typedef uint16_t size_type; - typedef typename IndirectHelper::return_type return_type; - typedef VectorIterator const_iterator; - typedef VectorReverseIterator const_reverse_iterator; - - // If T is a LE-scalar or a struct (!scalar_tag::value). - static FLATBUFFERS_CONSTEXPR bool is_span_observable = - (scalar_tag::value && (FLATBUFFERS_LITTLEENDIAN || sizeof(T) == 1)) || - !scalar_tag::value; - - FLATBUFFERS_CONSTEXPR uint16_t size() const { return length; } - - return_type Get(uoffset_t i) const { - FLATBUFFERS_ASSERT(i < size()); - return IndirectHelper::Read(Data(), i); - } - - return_type operator[](uoffset_t i) const { return Get(i); } - - // If this is a Vector of enums, T will be its storage type, not the enum - // type. This function makes it convenient to retrieve value with enum - // type E. - template E GetEnum(uoffset_t i) const { - return static_cast(Get(i)); - } - - const_iterator begin() const { return const_iterator(Data(), 0); } - const_iterator end() const { return const_iterator(Data(), size()); } - - const_reverse_iterator rbegin() const { - return const_reverse_iterator(end()); - } - const_reverse_iterator rend() const { - return const_reverse_iterator(begin()); - } - - const_iterator cbegin() const { return begin(); } - const_iterator cend() const { return end(); } - - const_reverse_iterator crbegin() const { return rbegin(); } - const_reverse_iterator crend() const { return rend(); } - - // Get a mutable pointer to elements inside this array. - // This method used to mutate arrays of structs followed by a @p Mutate - // operation. For primitive types use @p Mutate directly. - // @warning Assignments and reads to/from the dereferenced pointer are not - // automatically converted to the correct endianness. - typename flatbuffers::conditional::type - GetMutablePointer(uoffset_t i) const { - FLATBUFFERS_ASSERT(i < size()); - return const_cast(&data()[i]); - } - - // Change elements if you have a non-const pointer to this object. - void Mutate(uoffset_t i, const T &val) { MutateImpl(scalar_tag(), i, val); } - - // The raw data in little endian format. Use with care. - const uint8_t *Data() const { return data_; } - - uint8_t *Data() { return data_; } - - // Similarly, but typed, much like std::vector::data - const T *data() const { return reinterpret_cast(Data()); } - T *data() { return reinterpret_cast(Data()); } - - // Copy data from a span with endian conversion. - // If this Array and the span overlap, the behavior is undefined. - void CopyFromSpan(flatbuffers::span src) { - const auto p1 = reinterpret_cast(src.data()); - const auto p2 = Data(); - FLATBUFFERS_ASSERT(!(p1 >= p2 && p1 < (p2 + length)) && - !(p2 >= p1 && p2 < (p1 + length))); - (void)p1; - (void)p2; - CopyFromSpanImpl(flatbuffers::bool_constant(), src); - } - - protected: - void MutateImpl(flatbuffers::true_type, uoffset_t i, const T &val) { - FLATBUFFERS_ASSERT(i < size()); - WriteScalar(data() + i, val); - } - - void MutateImpl(flatbuffers::false_type, uoffset_t i, const T &val) { - *(GetMutablePointer(i)) = val; - } - - void CopyFromSpanImpl(flatbuffers::true_type, - flatbuffers::span src) { - // Use std::memcpy() instead of std::copy() to avoid performance degradation - // due to aliasing if T is char or unsigned char. - // The size is known at compile time, so memcpy would be inlined. - std::memcpy(data(), src.data(), length * sizeof(T)); - } - - // Copy data from flatbuffers::span with endian conversion. - void CopyFromSpanImpl(flatbuffers::false_type, - flatbuffers::span src) { - for (size_type k = 0; k < length; k++) { Mutate(k, src[k]); } - } - - // This class is only used to access pre-existing data. Don't ever - // try to construct these manually. - // 'constexpr' allows us to use 'size()' at compile time. - // @note Must not use 'FLATBUFFERS_CONSTEXPR' here, as const is not allowed on - // a constructor. -#if defined(__cpp_constexpr) - constexpr Array(); -#else - Array(); -#endif - - uint8_t data_[length * sizeof(T)]; - - private: - // This class is a pointer. Copying will therefore create an invalid object. - // Private and unimplemented copy constructor. - Array(const Array &); - Array &operator=(const Array &); -}; - -// Specialization for Array[struct] with access using Offset pointer. -// This specialization used by idl_gen_text.cpp. -template class Array, length> { - static_assert(flatbuffers::is_same::value, "unexpected type T"); - - public: - typedef const void *return_type; - - const uint8_t *Data() const { return data_; } - - // Make idl_gen_text.cpp::PrintContainer happy. - return_type operator[](uoffset_t) const { - FLATBUFFERS_ASSERT(false); - return nullptr; - } - - private: - // This class is only used to access pre-existing data. - Array(); - Array(const Array &); - Array &operator=(const Array &); - - uint8_t data_[1]; -}; - -template -FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span(Array &arr) - FLATBUFFERS_NOEXCEPT { - static_assert( - Array::is_span_observable, - "wrong type U, only plain struct, LE-scalar, or byte types are allowed"); - return span(arr.data(), N); -} - -template -FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span( - const Array &arr) FLATBUFFERS_NOEXCEPT { - static_assert( - Array::is_span_observable, - "wrong type U, only plain struct, LE-scalar, or byte types are allowed"); - return span(arr.data(), N); -} - -template -FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span -make_bytes_span(Array &arr) FLATBUFFERS_NOEXCEPT { - static_assert(Array::is_span_observable, - "internal error, Array might hold only scalars or structs"); - return span(arr.Data(), sizeof(U) * N); -} - -template -FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span -make_bytes_span(const Array &arr) FLATBUFFERS_NOEXCEPT { - static_assert(Array::is_span_observable, - "internal error, Array might hold only scalars or structs"); - return span(arr.Data(), sizeof(U) * N); -} - -// Cast a raw T[length] to a raw flatbuffers::Array -// without endian conversion. Use with care. -// TODO: move these Cast-methods to `internal` namespace. -template -Array &CastToArray(T (&arr)[length]) { - return *reinterpret_cast *>(arr); -} - -template -const Array &CastToArray(const T (&arr)[length]) { - return *reinterpret_cast *>(arr); -} - -template -Array &CastToArrayOfEnum(T (&arr)[length]) { - static_assert(sizeof(E) == sizeof(T), "invalid enum type E"); - return *reinterpret_cast *>(arr); -} - -template -const Array &CastToArrayOfEnum(const T (&arr)[length]) { - static_assert(sizeof(E) == sizeof(T), "invalid enum type E"); - return *reinterpret_cast *>(arr); -} - -} // namespace flatbuffers - -#endif // FLATBUFFERS_ARRAY_H_ diff --git a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/buffer.h b/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/buffer.h deleted file mode 100644 index e8d2ce9c..00000000 --- a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/buffer.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright 2021 Google Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FLATBUFFERS_BUFFER_H_ -#define FLATBUFFERS_BUFFER_H_ - -#include "flatbuffers/base.h" - -namespace flatbuffers { - -// Wrapper for uoffset_t to allow safe template specialization. -// Value is allowed to be 0 to indicate a null object (see e.g. AddOffset). -template struct Offset { - uoffset_t o; - Offset() : o(0) {} - Offset(uoffset_t _o) : o(_o) {} - Offset Union() const { return Offset(o); } - bool IsNull() const { return !o; } -}; - -inline void EndianCheck() { - int endiantest = 1; - // If this fails, see FLATBUFFERS_LITTLEENDIAN above. - FLATBUFFERS_ASSERT(*reinterpret_cast(&endiantest) == - FLATBUFFERS_LITTLEENDIAN); - (void)endiantest; -} - -template FLATBUFFERS_CONSTEXPR size_t AlignOf() { - // clang-format off - #ifdef _MSC_VER - return __alignof(T); - #else - #ifndef alignof - return __alignof__(T); - #else - return alignof(T); - #endif - #endif - // clang-format on -} - -// Lexicographically compare two strings (possibly containing nulls), and -// return true if the first is less than the second. -static inline bool StringLessThan(const char *a_data, uoffset_t a_size, - const char *b_data, uoffset_t b_size) { - const auto cmp = memcmp(a_data, b_data, (std::min)(a_size, b_size)); - return cmp == 0 ? a_size < b_size : cmp < 0; -} - -// When we read serialized data from memory, in the case of most scalars, -// we want to just read T, but in the case of Offset, we want to actually -// perform the indirection and return a pointer. -// The template specialization below does just that. -// It is wrapped in a struct since function templates can't overload on the -// return type like this. -// The typedef is for the convenience of callers of this function -// (avoiding the need for a trailing return decltype) -template struct IndirectHelper { - typedef T return_type; - typedef T mutable_return_type; - static const size_t element_stride = sizeof(T); - static return_type Read(const uint8_t *p, uoffset_t i) { - return EndianScalar((reinterpret_cast(p))[i]); - } -}; -template struct IndirectHelper> { - typedef const T *return_type; - typedef T *mutable_return_type; - static const size_t element_stride = sizeof(uoffset_t); - static return_type Read(const uint8_t *p, uoffset_t i) { - p += i * sizeof(uoffset_t); - return reinterpret_cast(p + ReadScalar(p)); - } -}; -template struct IndirectHelper { - typedef const T *return_type; - typedef T *mutable_return_type; - static const size_t element_stride = sizeof(T); - static return_type Read(const uint8_t *p, uoffset_t i) { - return reinterpret_cast(p + i * sizeof(T)); - } -}; - -/// @brief Get a pointer to the the file_identifier section of the buffer. -/// @return Returns a const char pointer to the start of the file_identifier -/// characters in the buffer. The returned char * has length -/// 'flatbuffers::FlatBufferBuilder::kFileIdentifierLength'. -/// This function is UNDEFINED for FlatBuffers whose schema does not include -/// a file_identifier (likely points at padding or the start of a the root -/// vtable). -inline const char *GetBufferIdentifier(const void *buf, - bool size_prefixed = false) { - return reinterpret_cast(buf) + - ((size_prefixed) ? 2 * sizeof(uoffset_t) : sizeof(uoffset_t)); -} - -// Helper to see if the identifier in a buffer has the expected value. -inline bool BufferHasIdentifier(const void *buf, const char *identifier, - bool size_prefixed = false) { - return strncmp(GetBufferIdentifier(buf, size_prefixed), identifier, - flatbuffers::kFileIdentifierLength) == 0; -} - -/// @cond FLATBUFFERS_INTERNAL -// Helpers to get a typed pointer to the root object contained in the buffer. -template T *GetMutableRoot(void *buf) { - EndianCheck(); - return reinterpret_cast( - reinterpret_cast(buf) + - EndianScalar(*reinterpret_cast(buf))); -} - -template T *GetMutableSizePrefixedRoot(void *buf) { - return GetMutableRoot(reinterpret_cast(buf) + - sizeof(uoffset_t)); -} - -template const T *GetRoot(const void *buf) { - return GetMutableRoot(const_cast(buf)); -} - -template const T *GetSizePrefixedRoot(const void *buf) { - return GetRoot(reinterpret_cast(buf) + sizeof(uoffset_t)); -} - -} // namespace flatbuffers - -#endif // FLATBUFFERS_BUFFER_H_ \ No newline at end of file diff --git a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/buffer_ref.h b/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/buffer_ref.h deleted file mode 100644 index ce302073..00000000 --- a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/buffer_ref.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2021 Google Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FLATBUFFERS_BUFFER_REF_H_ -#define FLATBUFFERS_BUFFER_REF_H_ - -#include "flatbuffers/base.h" -#include "flatbuffers/verifier.h" - -namespace flatbuffers { - -// Convenient way to bundle a buffer and its length, to pass it around -// typed by its root. -// A BufferRef does not own its buffer. -struct BufferRefBase {}; // for std::is_base_of - -template struct BufferRef : BufferRefBase { - BufferRef() : buf(nullptr), len(0), must_free(false) {} - BufferRef(uint8_t *_buf, uoffset_t _len) - : buf(_buf), len(_len), must_free(false) {} - - ~BufferRef() { - if (must_free) free(buf); - } - - const T *GetRoot() const { return flatbuffers::GetRoot(buf); } - - bool Verify() { - Verifier verifier(buf, len); - return verifier.VerifyBuffer(nullptr); - } - - uint8_t *buf; - uoffset_t len; - bool must_free; -}; - -} // namespace flatbuffers - -#endif // FLATBUFFERS_BUFFER_REF_H_ \ No newline at end of file diff --git a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/default_allocator.h b/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/default_allocator.h deleted file mode 100644 index 975d9380..00000000 --- a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/default_allocator.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2021 Google Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FLATBUFFERS_DEFAULT_ALLOCATOR_H_ -#define FLATBUFFERS_DEFAULT_ALLOCATOR_H_ - -#include "flatbuffers/allocator.h" -#include "flatbuffers/base.h" - -namespace flatbuffers { - -// DefaultAllocator uses new/delete to allocate memory regions -class DefaultAllocator : public Allocator { - public: - uint8_t *allocate(size_t size) FLATBUFFERS_OVERRIDE { - return new uint8_t[size]; - } - - void deallocate(uint8_t *p, size_t) FLATBUFFERS_OVERRIDE { delete[] p; } - - static void dealloc(void *p, size_t) { delete[] static_cast(p); } -}; - -// These functions allow for a null allocator to mean use the default allocator, -// as used by DetachedBuffer and vector_downward below. -// This is to avoid having a statically or dynamically allocated default -// allocator, or having to move it between the classes that may own it. -inline uint8_t *Allocate(Allocator *allocator, size_t size) { - return allocator->allocate(size); -} - -inline void Deallocate(Allocator *allocator, uint8_t *p, size_t size) { - allocator->deallocate(p, size); -} - -inline uint8_t *ReallocateDownward(Allocator *allocator, uint8_t *old_p, - size_t old_size, size_t new_size, - size_t in_use_back, size_t in_use_front) { - return allocator->reallocate_downward(old_p, old_size, new_size, in_use_back, - in_use_front); -} - -} // namespace flatbuffers - -#endif // FLATBUFFERS_DEFAULT_ALLOCATOR_H_ diff --git a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/detached_buffer.h b/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/detached_buffer.h deleted file mode 100644 index 760a0884..00000000 --- a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/detached_buffer.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2021 Google Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FLATBUFFERS_DETACHED_BUFFER_H_ -#define FLATBUFFERS_DETACHED_BUFFER_H_ - -#include "flatbuffers/allocator.h" -#include "flatbuffers/base.h" -#include "flatbuffers/default_allocator.h" - -namespace flatbuffers { - -// DetachedBuffer is a finished flatbuffer memory region, detached from its -// builder. The original memory region and allocator are also stored so that -// the DetachedBuffer can manage the memory lifetime. -class DetachedBuffer { - public: - DetachedBuffer() - : allocator_(nullptr), - own_allocator_(false), - buf_(nullptr), - reserved_(0), - cur_(nullptr), - size_(0) {} - - DetachedBuffer(Allocator *allocator, bool own_allocator, uint8_t *buf, - size_t reserved, uint8_t *cur, size_t sz) - : allocator_(allocator), - own_allocator_(own_allocator), - buf_(buf), - reserved_(reserved), - cur_(cur), - size_(sz) {} - - DetachedBuffer(DetachedBuffer &&other) - : allocator_(other.allocator_), - own_allocator_(other.own_allocator_), - buf_(other.buf_), - reserved_(other.reserved_), - cur_(other.cur_), - size_(other.size_) { - other.reset(); - } - - DetachedBuffer &operator=(DetachedBuffer &&other) { - if (this == &other) return *this; - - destroy(); - - allocator_ = other.allocator_; - own_allocator_ = other.own_allocator_; - buf_ = other.buf_; - reserved_ = other.reserved_; - cur_ = other.cur_; - size_ = other.size_; - - other.reset(); - - return *this; - } - - ~DetachedBuffer() { destroy(); } - - const uint8_t *data() const { return cur_; } - - uint8_t *data() { return cur_; } - - size_t size() const { return size_; } - - // These may change access mode, leave these at end of public section - FLATBUFFERS_DELETE_FUNC(DetachedBuffer(const DetachedBuffer &other)); - FLATBUFFERS_DELETE_FUNC( - DetachedBuffer &operator=(const DetachedBuffer &other)); - - protected: - Allocator *allocator_; - bool own_allocator_; - uint8_t *buf_; - size_t reserved_; - uint8_t *cur_; - size_t size_; - - inline void destroy() { - if (buf_) Deallocate(allocator_, buf_, reserved_); - if (own_allocator_ && allocator_) { delete allocator_; } - reset(); - } - - inline void reset() { - allocator_ = nullptr; - own_allocator_ = false; - buf_ = nullptr; - reserved_ = 0; - cur_ = nullptr; - size_ = 0; - } -}; - -} // namespace flatbuffers - -#endif // FLATBUFFERS_DETACHED_BUFFER_H_ diff --git a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/string.h b/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/string.h deleted file mode 100644 index 3db95fce..00000000 --- a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/string.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2021 Google Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FLATBUFFERS_STRING_H_ -#define FLATBUFFERS_STRING_H_ - -#include "flatbuffers/base.h" -#include "flatbuffers/vector.h" - -namespace flatbuffers { - -struct String : public Vector { - const char *c_str() const { return reinterpret_cast(Data()); } - std::string str() const { return std::string(c_str(), size()); } - - // clang-format off - #ifdef FLATBUFFERS_HAS_STRING_VIEW - flatbuffers::string_view string_view() const { - return flatbuffers::string_view(c_str(), size()); - } - #endif // FLATBUFFERS_HAS_STRING_VIEW - // clang-format on - - bool operator<(const String &o) const { - return StringLessThan(this->data(), this->size(), o.data(), o.size()); - } -}; - -// Convenience function to get std::string from a String returning an empty -// string on null pointer. -static inline std::string GetString(const String *str) { - return str ? str->str() : ""; -} - -// Convenience function to get char* from a String returning an empty string on -// null pointer. -static inline const char *GetCstring(const String *str) { - return str ? str->c_str() : ""; -} - -#ifdef FLATBUFFERS_HAS_STRING_VIEW -// Convenience function to get string_view from a String returning an empty -// string_view on null pointer. -static inline flatbuffers::string_view GetStringView(const String *str) { - return str ? str->string_view() : flatbuffers::string_view(); -} -#endif // FLATBUFFERS_HAS_STRING_VIEW - -} // namespace flatbuffers - -#endif // FLATBUFFERS_STRING_H_ \ No newline at end of file diff --git a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/struct.h b/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/struct.h deleted file mode 100644 index d8753c84..00000000 --- a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/struct.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2021 Google Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FLATBUFFERS_STRUCT_H_ -#define FLATBUFFERS_STRUCT_H_ - -#include "flatbuffers/base.h" - -namespace flatbuffers { - -// "structs" are flat structures that do not have an offset table, thus -// always have all members present and do not support forwards/backwards -// compatible extensions. - -class Struct FLATBUFFERS_FINAL_CLASS { - public: - template T GetField(uoffset_t o) const { - return ReadScalar(&data_[o]); - } - - template T GetStruct(uoffset_t o) const { - return reinterpret_cast(&data_[o]); - } - - const uint8_t *GetAddressOf(uoffset_t o) const { return &data_[o]; } - uint8_t *GetAddressOf(uoffset_t o) { return &data_[o]; } - - private: - // private constructor & copy constructor: you obtain instances of this - // class by pointing to existing data only - Struct(); - Struct(const Struct &); - Struct &operator=(const Struct &); - - uint8_t data_[1]; -}; - -} // namespace flatbuffers - -#endif // FLATBUFFERS_STRUCT_H_ \ No newline at end of file diff --git a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/vector_downward.h b/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/vector_downward.h deleted file mode 100644 index 33913918..00000000 --- a/code/components/tflite-lib/third_party/flatbuffers/include/flatbuffers/vector_downward.h +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright 2021 Google Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef FLATBUFFERS_VECTOR_DOWNWARD_H_ -#define FLATBUFFERS_VECTOR_DOWNWARD_H_ - -#include "flatbuffers/base.h" -#include "flatbuffers/default_allocator.h" -#include "flatbuffers/detached_buffer.h" - -namespace flatbuffers { - -// This is a minimal replication of std::vector functionality, -// except growing from higher to lower addresses. i.e push_back() inserts data -// in the lowest address in the vector. -// Since this vector leaves the lower part unused, we support a "scratch-pad" -// that can be stored there for temporary data, to share the allocated space. -// Essentially, this supports 2 std::vectors in a single buffer. -class vector_downward { - public: - explicit vector_downward(size_t initial_size, Allocator *allocator, - bool own_allocator, size_t buffer_minalign) - : allocator_(allocator), - own_allocator_(own_allocator), - initial_size_(initial_size), - buffer_minalign_(buffer_minalign), - reserved_(0), - size_(0), - buf_(nullptr), - cur_(nullptr), - scratch_(nullptr) {} - - vector_downward(vector_downward &&other) - // clang-format on - : allocator_(other.allocator_), - own_allocator_(other.own_allocator_), - initial_size_(other.initial_size_), - buffer_minalign_(other.buffer_minalign_), - reserved_(other.reserved_), - size_(other.size_), - buf_(other.buf_), - cur_(other.cur_), - scratch_(other.scratch_) { - // No change in other.allocator_ - // No change in other.initial_size_ - // No change in other.buffer_minalign_ - other.own_allocator_ = false; - other.reserved_ = 0; - other.buf_ = nullptr; - other.cur_ = nullptr; - other.scratch_ = nullptr; - } - - vector_downward &operator=(vector_downward &&other) { - // Move construct a temporary and swap idiom - vector_downward temp(std::move(other)); - swap(temp); - return *this; - } - - ~vector_downward() { - clear_buffer(); - clear_allocator(); - } - - void reset() { - clear_buffer(); - clear(); - } - - void clear() { - if (buf_) { - cur_ = buf_ + reserved_; - } else { - reserved_ = 0; - cur_ = nullptr; - } - size_ = 0; - clear_scratch(); - } - - void clear_scratch() { scratch_ = buf_; } - - void clear_allocator() { - if (own_allocator_ && allocator_) { delete allocator_; } - allocator_ = nullptr; - own_allocator_ = false; - } - - void clear_buffer() { - if (buf_) Deallocate(allocator_, buf_, reserved_); - buf_ = nullptr; - } - - // Relinquish the pointer to the caller. - uint8_t *release_raw(size_t &allocated_bytes, size_t &offset) { - auto *buf = buf_; - allocated_bytes = reserved_; - offset = static_cast(cur_ - buf_); - - // release_raw only relinquishes the buffer ownership. - // Does not deallocate or reset the allocator. Destructor will do that. - buf_ = nullptr; - clear(); - return buf; - } - - // Relinquish the pointer to the caller. - DetachedBuffer release() { - // allocator ownership (if any) is transferred to DetachedBuffer. - DetachedBuffer fb(allocator_, own_allocator_, buf_, reserved_, cur_, - size()); - if (own_allocator_) { - allocator_ = nullptr; - own_allocator_ = false; - } - buf_ = nullptr; - clear(); - return fb; - } - - size_t ensure_space(size_t len) { - FLATBUFFERS_ASSERT(cur_ >= scratch_ && scratch_ >= buf_); - if (len > static_cast(cur_ - scratch_)) { reallocate(len); } - // Beyond this, signed offsets may not have enough range: - // (FlatBuffers > 2GB not supported). - FLATBUFFERS_ASSERT(size() < FLATBUFFERS_MAX_BUFFER_SIZE); - return len; - } - - inline uint8_t *make_space(size_t len) { - if (len) { - ensure_space(len); - cur_ -= len; - size_ += static_cast(len); - } - return cur_; - } - - // Returns nullptr if using the DefaultAllocator. - Allocator *get_custom_allocator() { return allocator_; } - - inline uoffset_t size() const { return size_; } - - uoffset_t scratch_size() const { - return static_cast(scratch_ - buf_); - } - - size_t capacity() const { return reserved_; } - - uint8_t *data() const { - FLATBUFFERS_ASSERT(cur_); - return cur_; - } - - uint8_t *scratch_data() const { - FLATBUFFERS_ASSERT(buf_); - return buf_; - } - - uint8_t *scratch_end() const { - FLATBUFFERS_ASSERT(scratch_); - return scratch_; - } - - uint8_t *data_at(size_t offset) const { return buf_ + reserved_ - offset; } - - void push(const uint8_t *bytes, size_t num) { - if (num > 0) { memcpy(make_space(num), bytes, num); } - } - - // Specialized version of push() that avoids memcpy call for small data. - template void push_small(const T &little_endian_t) { - make_space(sizeof(T)); - *reinterpret_cast(cur_) = little_endian_t; - } - - template void scratch_push_small(const T &t) { - ensure_space(sizeof(T)); - *reinterpret_cast(scratch_) = t; - scratch_ += sizeof(T); - } - - // fill() is most frequently called with small byte counts (<= 4), - // which is why we're using loops rather than calling memset. - void fill(size_t zero_pad_bytes) { - make_space(zero_pad_bytes); - for (size_t i = 0; i < zero_pad_bytes; i++) cur_[i] = 0; - } - - // Version for when we know the size is larger. - // Precondition: zero_pad_bytes > 0 - void fill_big(size_t zero_pad_bytes) { - memset(make_space(zero_pad_bytes), 0, zero_pad_bytes); - } - - void pop(size_t bytes_to_remove) { - cur_ += bytes_to_remove; - size_ -= static_cast(bytes_to_remove); - } - - void scratch_pop(size_t bytes_to_remove) { scratch_ -= bytes_to_remove; } - - void swap(vector_downward &other) { - using std::swap; - swap(allocator_, other.allocator_); - swap(own_allocator_, other.own_allocator_); - swap(initial_size_, other.initial_size_); - swap(buffer_minalign_, other.buffer_minalign_); - swap(reserved_, other.reserved_); - swap(size_, other.size_); - swap(buf_, other.buf_); - swap(cur_, other.cur_); - swap(scratch_, other.scratch_); - } - - void swap_allocator(vector_downward &other) { - using std::swap; - swap(allocator_, other.allocator_); - swap(own_allocator_, other.own_allocator_); - } - - private: - // You shouldn't really be copying instances of this class. - FLATBUFFERS_DELETE_FUNC(vector_downward(const vector_downward &)); - FLATBUFFERS_DELETE_FUNC(vector_downward &operator=(const vector_downward &)); - - Allocator *allocator_; - bool own_allocator_; - size_t initial_size_; - size_t buffer_minalign_; - size_t reserved_; - uoffset_t size_; - uint8_t *buf_; - uint8_t *cur_; // Points at location between empty (below) and used (above). - uint8_t *scratch_; // Points to the end of the scratchpad in use. - - void reallocate(size_t len) { - auto old_reserved = reserved_; - auto old_size = size(); - auto old_scratch_size = scratch_size(); - reserved_ += - (std::max)(len, old_reserved ? old_reserved / 2 : initial_size_); - reserved_ = (reserved_ + buffer_minalign_ - 1) & ~(buffer_minalign_ - 1); - if (buf_) { - buf_ = ReallocateDownward(allocator_, buf_, old_reserved, reserved_, - old_size, old_scratch_size); - } else { - buf_ = Allocate(allocator_, reserved_); - } - cur_ = buf_ + reserved_ - old_size; - scratch_ = buf_ + old_scratch_size; - } -}; - -} // namespace flatbuffers - -#endif // FLATBUFFERS_VECTOR_DOWNWARD_H_ diff --git a/code/components/tflite-lib/third_party/gemmlowp/LICENSE b/code/components/tflite-lib/third_party/gemmlowp/LICENSE deleted file mode 100644 index d6456956..00000000 --- a/code/components/tflite-lib/third_party/gemmlowp/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/code/components/tflite-lib/third_party/gemmlowp/fixedpoint/fixedpoint.h b/code/components/tflite-lib/third_party/gemmlowp/fixedpoint/fixedpoint.h deleted file mode 100644 index 51b5aff4..00000000 --- a/code/components/tflite-lib/third_party/gemmlowp/fixedpoint/fixedpoint.h +++ /dev/null @@ -1,900 +0,0 @@ -// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// fixedpoint.h: fixed-point arithmetic, with basic operations and -// a few math functions such as tanh. - -#ifndef GEMMLOWP_INTERNAL_FIXEDPOINT_H_ -#define GEMMLOWP_INTERNAL_FIXEDPOINT_H_ - -#include -#include -#include -#include -#include - -#include "../internal/detect_platform.h" - -namespace gemmlowp { - -// Part 1: Low-level integer-arithmetic primitives. -// The implementations here are generic implementations valid for -// scalar types (e.g. std::int32_t). Architecture-specific SIMD types -// (e.g. NEON int32x4_t) may be supported by providing -// specializations for them in separate files. -// -// The purpose of these primitives is two-fold: -// - They will be used to implement higher-level fixed-point -// abstractions, namely the FixedPoint class and its arithmetic -// operators. -// - They will be directly used to implement some more involved -// fixed-point computations, e.g. the fixed-point implementation -// of math functions such as tanh. - -// Some compile-time traits around raw types to handle SIMD aspects: -// number of lanes, underlying scalar type. -template -struct FixedPointRawTypeTraits {}; - -template <> -struct FixedPointRawTypeTraits { - typedef std::int32_t ScalarRawType; - static constexpr int kLanes = 1; -}; - -template <> -struct FixedPointRawTypeTraits { - typedef std::int16_t ScalarRawType; - static constexpr int kLanes = 1; -}; - -// Returns a SIMD value duplicating a scalar value across all lanes. -template -tRawType Dup(typename FixedPointRawTypeTraits::ScalarRawType x) { - return x; -} - -// Plain bit-wise AND -template -tIntegerType BitAnd(tIntegerType a, tIntegerType b) { - return a & b; -} - -// Plain bit-wise OR -template -tIntegerType BitOr(tIntegerType a, tIntegerType b) { - return a | b; -} - -// Plain bit-wise XOR -template -tIntegerType BitXor(tIntegerType a, tIntegerType b) { - return a ^ b; -} - -// Plain bit-wise NOT -template -tIntegerType BitNot(tIntegerType a) { - return ~a; -} - -// Integer addition. Not saturating. Overflow is undefined behavior. -template -tIntegerType Add(tIntegerType a, tIntegerType b) { - return a + b; -} - -// Integer subtraction. Not saturating. Overflow is undefined behavior. -template -tIntegerType Mul(tIntegerType a, tIntegerType b) { - return a * b; -} - -template -tIntegerType Sub(tIntegerType a, tIntegerType b) { - return a - b; -} - -// Integer unary negative. Not saturating. Overflow is undefined behavior. -template -tIntegerType Neg(tIntegerType a) { - return -a; -} - -// Integer arithmetic left-shift, equivalent to multiplying with a power of two. -// Negative values are OK. In case of overflow, no Undefined -// Behavior, but the results are implementation-defined (in practice, -// they currently are saturated, but we make no commitment to that). The idea -// is that the caller will want to implement the overflowing cases with -// saturation with compare-and-mask, so we don't care about the results -// in the overflow case, we just want to avoid undefined behavior. -// -// tIntegerType may be int32 or any narrower signed type. -template -tIntegerType ShiftLeft(tIntegerType a, int offset) { - const std::int64_t wide_a = static_cast(a); - const std::int64_t wide_shifted = wide_a * (1 << offset); - const auto min = std::numeric_limits::min(); - const auto max = std::numeric_limits::max(); - return wide_shifted < min - ? min - : wide_shifted > max ? max - : static_cast(wide_shifted); -} - -// Integer arithmetic right-shift. Not rounding. -// Relying on implementation-defined, but in-practice-consistent, -// C++ compiler behavior. -template -tIntegerType ShiftRight(tIntegerType a, int offset) { - return a >> offset; -} - -// Each bit of the result is set to the corresponding bit of either then_val or -// else_val depending on whether the corresponding bit of if_mask is set. -// Equivalent to the VBSL instruction in ARM NEON. -template -tIntegerType SelectUsingMask(tIntegerType if_mask, tIntegerType then_val, - tIntegerType else_val) { - return BitXor(BitAnd(if_mask, then_val), BitAnd(BitNot(if_mask), else_val)); -} - -// For each input scalar, the corresponding bits of the result are set if the -// input scalar is non-zero. -template -tIntegerType MaskIfNonZero(tIntegerType a) { - static constexpr tIntegerType zero = 0; - return a ? BitNot(zero) : zero; -} - -// For each input scalar, the corresponding bits of the result are set if the -// input scalar is zero. -template -tIntegerType MaskIfZero(tIntegerType a) { - return MaskIfNonZero(!a); -} - -// For each pair of input scalars, the corresponding bits of the result are -// set if the input scalars are equal. -template -tIntegerType MaskIfEqual(tIntegerType a, tIntegerType b) { - return MaskIfNonZero(a == b); -} - -// For each pair of input scalars, the corresponding bits of the result are -// set if the input scalars are not equal. -template -tIntegerType MaskIfNotEqual(tIntegerType a, tIntegerType b) { - return MaskIfNonZero(a != b); -} - -// For each pair of input scalars, the corresponding bits of the result are -// set if the input scalars a, b satisfy a > b. -template -tIntegerType MaskIfGreaterThan(tIntegerType a, tIntegerType b) { - return MaskIfNonZero(a > b); -} - -// For each pair of input scalars, the corresponding bits of the result are -// set if the input scalars a, b satisfy a >= b. -template -tIntegerType MaskIfGreaterThanOrEqual(tIntegerType a, tIntegerType b) { - return MaskIfNonZero(a >= b); -} - -// For each pair of input scalars, the corresponding bits of the result are -// set if the input scalars a, b satisfy a < b. -template -tIntegerType MaskIfLessThan(tIntegerType a, tIntegerType b) { - return MaskIfNonZero(a < b); -} - -// For each pair of input scalars, the corresponding bits of the result are -// set if the input scalars a, b satisfy a <= b. -template -tIntegerType MaskIfLessThanOrEqual(tIntegerType a, tIntegerType b) { - return MaskIfNonZero(a <= b); -} - -// Returns true if all of the input scalars are nonzero. -// This function may currently assume that each of the input scalars has either -// all or none of its bits set. Otherwise, its behavior is currently undefined. -template -bool All(tIntegerType a) { - return a; -} - -// Returns true if any of the input scalars are nonzero. -// This function may currently assume that each of the input scalars has either -// all or none of its bits set. Otherwise, its behavior is currently undefined. -template -bool Any(tIntegerType a) { - return a; -} - -// Returns (a+b)/2, rounded to the nearest integer. -// Equivalent to VRHADD in the ARM NEON instruction set. -template -IntegerType RoundingHalfSum(IntegerType a, IntegerType b) { - static_assert(std::is_same::value, "unimplemented"); - (void)b; - return a; -} - -template <> -inline std::int32_t RoundingHalfSum(std::int32_t a, std::int32_t b) { - std::int64_t a64 = a; - std::int64_t b64 = b; - std::int64_t sum = a64 + b64; - std::int64_t sign = sum >= 0 ? 1 : -1; - return static_cast((sum + sign) / 2); -} - -template <> -inline std::int16_t RoundingHalfSum(std::int16_t a, std::int16_t b) { - std::int32_t a32 = a; - std::int32_t b32 = b; - std::int32_t sum = a32 + b32; - std::int32_t sign = sum >= 0 ? 1 : -1; - return static_cast((sum + sign) / 2); -} - -template -IntegerType SaturatingAdd(IntegerType a, IntegerType b) { - static_assert(std::is_same::value, "unimplemented"); - (void)b; - return a; -} - -// So far this is only needed for int16. -template <> -inline std::int16_t SaturatingAdd(std::int16_t a, std::int16_t b) { - std::int32_t a32 = a; - std::int32_t b32 = b; - std::int32_t sum = a32 + b32; - return static_cast( - std::min(static_cast(32767), - std::max(static_cast(-32768), sum))); -} - -// Returns a+b, saturating if the integers are 16bit or narrower, -// otherwise just a plain addition. -template -struct AddSaturatingIf16BitImpl { - static IntegerType Run(IntegerType a, IntegerType b) { return Add(a, b); } -}; -template -struct AddSaturatingIf16BitImpl { - static IntegerType Run(IntegerType a, IntegerType b) { - return SaturatingAdd(a, b); - } -}; -template -IntegerType AddSaturatingIf16Bit(IntegerType a, IntegerType b) { - using ScalarType = - typename FixedPointRawTypeTraits::ScalarRawType; - return AddSaturatingIf16BitImpl::Run(a, - b); -} - -// Returns the integer that represents the product of two fixed-point -// numbers, interpreting all integers as fixed-point values in the -// interval [-1, 1), rounding to the nearest value, and saturating -// -1 * -1 to the maximum value (since 1 is not in the half-open -// interval [-1, 1)). -// -// [The explanation below specializes to std::int32_t for example purpose.] -// -// The mapping between IntegerType and the interval [-1, 1) is unique and -// implied by IntegerType, which is assumed to be signed. For example, -// for IntegerType==std::int32_t, the mapping is -// real_value = integer_value / 2^31. -// So in this case, and leaving aside rounding and saturating, this -// function computes ((a / 2^31) * (b / 2^31)) * 2^31, which simplifies to -// (a * b) / 2^31. -// -// The 'doubling' part in the name of this function comes from the fact that -// this operation is very close to a "multiply-high" operation, keeping only -// the top half bits, except that that would be effectively computing -// (a * b) / 2^32, -// so here we are computing 2x that, since -// 1/2^31 = 2 * 1/2^32. -// The idea is to use all of the available 32 bits in the destination int32 -// value. -// -// [End of the explanation specializing to int32.] -// -// This is equivalent to the VQRDMULH instruction in ARM NEON. -template -IntegerType SaturatingRoundingDoublingHighMul(IntegerType a, IntegerType b) { - static_assert(std::is_same::value, "unimplemented"); - (void)b; - return a; -} - -// This function implements the same computation as the ARMv7 NEON VQRDMULH -// instruction. -template <> -inline std::int32_t SaturatingRoundingDoublingHighMul(std::int32_t a, - std::int32_t b) { - bool overflow = a == b && a == std::numeric_limits::min(); - std::int64_t a_64(a); - std::int64_t b_64(b); - std::int64_t ab_64 = a_64 * b_64; - std::int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); - std::int32_t ab_x2_high32 = - static_cast((ab_64 + nudge) / (1ll << 31)); - return overflow ? std::numeric_limits::max() : ab_x2_high32; -} - -template <> -inline std::int16_t SaturatingRoundingDoublingHighMul(std::int16_t a, - std::int16_t b) { - bool overflow = a == b && a == std::numeric_limits::min(); - std::int32_t a_32(a); - std::int32_t b_32(b); - std::int32_t ab_32 = a_32 * b_32; - std::int16_t nudge = ab_32 >= 0 ? (1 << 14) : (1 - (1 << 14)); - std::int16_t ab_x2_high16 = - static_cast((ab_32 + nudge) / (1 << 15)); - return overflow ? std::numeric_limits::max() : ab_x2_high16; -} - -// Correctly-rounded-to-nearest division by a power-of-two. -// Also known as a rounding arithmetic right shift. -template -inline IntegerType RoundingDivideByPOT(IntegerType x, int exponent) { - assert(exponent >= 0); - assert(exponent <= 31); - const IntegerType mask = Dup((1ll << exponent) - 1); - const IntegerType zero = Dup(0); - const IntegerType one = Dup(1); - const IntegerType remainder = BitAnd(x, mask); - const IntegerType threshold = - Add(ShiftRight(mask, 1), BitAnd(MaskIfLessThan(x, zero), one)); - return Add(ShiftRight(x, exponent), - BitAnd(MaskIfGreaterThan(remainder, threshold), one)); -} - -// Returns the product of a run-time integer value by a compile-time power -// of two, with either a positive exponent (equivalent to an arithmetic -// left shift, saturating) or a negative exponent (equivalent to an arithmetic -// right shift, rounding to nearest). -template 0 ? 1 : Exponent < 0 ? -1 : 0)> -struct ImplSaturatingRoundingMultiplyByPOT {}; - -template -struct ImplSaturatingRoundingMultiplyByPOT { - static IntegerType eval(IntegerType x) { return x; } -}; - -template -struct ImplSaturatingRoundingMultiplyByPOT { - static IntegerType eval(IntegerType x) { - using ScalarIntegerType = - typename FixedPointRawTypeTraits::ScalarRawType; - const IntegerType min = - Dup(std::numeric_limits::min()); - const IntegerType max = - Dup(std::numeric_limits::max()); - const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType); - - const std::int32_t threshold = - ((1 << (ScalarIntegerTypeBits - 1 - Exponent)) - 1); - const IntegerType positive_mask = - MaskIfGreaterThan(x, Dup(threshold)); - const IntegerType negative_mask = - MaskIfLessThan(x, Dup(-threshold)); - - IntegerType result = ShiftLeft(x, Exponent); - result = SelectUsingMask(positive_mask, max, result); - result = SelectUsingMask(negative_mask, min, result); - return result; - } -}; - -template -struct ImplSaturatingRoundingMultiplyByPOT { - static IntegerType eval(IntegerType x) { - return RoundingDivideByPOT(x, -Exponent); - } -}; - -template -IntegerType SaturatingRoundingMultiplyByPOT(IntegerType x) { - return ImplSaturatingRoundingMultiplyByPOT::eval(x); -} - -// Part 2: the FixedPoint class. - -// A FixedPoint object represents a fixed-point value stored in the underlying -// integer type tRawType, if tRawType is a plain scalar integer type. -// Alternatively, tRawType may be a SIMD type (e.g. NEON int32x4_t) in which -// case a FixedPoint object represents a corresponding SIMD vector of fixed -// point values. -// -// tIntegerBits describes the range of the fixed-point format: if -// tIntegerBits == m then the range of representable values is the half-open -// interval [-2^m; 2^m) where the open boundary on the right side means that -// 2^m is not representable (how close the maximum representable value is to -// it, depends on bit-depth of tRawType). -// -// In "Q format notation", -// https://en.wikipedia.org/wiki/Q_(number_format) -// we are describing the format -// Qm.n -// where -// m = tIntegerBits -// and -// n = NumberOfBits(tRawType) - (m + 1) -// Note that the (m + 1) in the above line is because we adopt the convention -// that we count the integer bits exclusively of the sign bit; so (m + 1) is -// the total number of integer bits inclusive of the sign bit. -// -// Accordingly, the number of integral representable values in our range -// [-2^m ; 2^m) -// is equal to 2^(m+1). -template -class FixedPoint { - public: - typedef tRawType RawType; - - typedef FixedPointRawTypeTraits RawTypeTraits; - typedef typename RawTypeTraits::ScalarRawType ScalarRawType; - - static constexpr int kTotalBits = 8 * sizeof(ScalarRawType); - static constexpr int kIntegerBits = tIntegerBits; - static constexpr int kFractionalBits = kTotalBits - 1 - kIntegerBits; - static_assert(kIntegerBits >= 0 && kIntegerBits < kTotalBits, - "bad IntegerBits"); - - typedef FixedPoint ScalarFixedPointType; - - static const ScalarRawType ScalarRawMin() { - return std::numeric_limits::min(); - } - - static const ScalarRawType ScalarRawMax() { - return std::numeric_limits::max(); - } - - static const ScalarRawType RawMin() { - return VectorFromScalar(ScalarRawMin()); - } - - static const ScalarRawType RawMax() { - return VectorFromScalar(ScalarRawMax()); - } - - static FixedPoint FromRaw(RawType x) { - FixedPoint retval; - retval.raw() = x; - return retval; - } - - static FixedPoint FromScalarRaw(ScalarRawType x) { - FixedPoint retval; - retval.raw() = Dup(x); - return retval; - } - - static FixedPoint FromScalarFixedPoint(ScalarFixedPointType x) { - return FromScalarRaw(x.raw()); - } - - template - static FixedPoint ConstantPOT() { - static constexpr int kOffset = kFractionalBits + Exponent; - static_assert( - kOffset < 31, - "Constant not exactly representable in this fixed-point format"); - return FromScalarRaw(ScalarRawType(1) << kOffset); - } - - static FixedPoint Zero() { return FromScalarRaw(0); } - - static FixedPoint One() { - return FromScalarRaw( - kIntegerBits == 0 - ? ScalarRawMax() - : (ScalarRawType(1) << (kIntegerBits == 0 ? 0 : kFractionalBits))); - } - - static FixedPoint FromDouble(double x) { - const double min_bound = static_cast(ScalarRawMin()); - const double max_bound = static_cast(ScalarRawMax()); - return FromScalarRaw(static_cast(std::min( - std::max(round(x * static_cast(1ll << kFractionalBits)), - min_bound), - max_bound))); - } - - RawType raw() const { return i_; } - RawType& raw() { return i_; } - - private: - RawType i_; -}; - -// Part 3: implementation of arithmetic operators for the -// FixedPoint class, and a few related functions. - -// A FixedPoint multiplication is just a -// SaturatingRoundingDoublingHighMul operation on the underlying -// raw integer values. The IntegerBits simply add up, as is obvious -// from the fact that the range is [-2^IntegerBits, 2^IntegerBits). -template -FixedPoint operator*( - FixedPoint a, - FixedPoint b) { - FixedPoint c; - c.raw() = SaturatingRoundingDoublingHighMul(a.raw(), b.raw()); - return c; -} - -// Tweaking IntegerBits gives exact multiplication by a power of two. -template -FixedPoint ExactMulByPot( - FixedPoint a) { - FixedPoint c; - c.raw() = a.raw(); - return c; -} - -// If we want to leave IntegerBits fixed, then multiplication -// by a power of two has to be saturating/rounding, not exact anymore. -template -FixedPoint SaturatingRoundingMultiplyByPOT( - FixedPoint a) { - return FixedPoint::FromRaw( - SaturatingRoundingMultiplyByPOT(a.raw())); -} - -// Generic arithmetic operators. - -#define MAKE_FIXEDPOINT_UNARY_FUNC(FuncName, ImplFuncName) \ - template \ - FixedPoint FuncName( \ - FixedPoint a) { \ - return FixedPoint::FromRaw(ImplFuncName(a.raw())); \ - } - -#define MAKE_FIXEDPOINT_BINARY_FUNC(FuncName, ImplFuncName) \ - template \ - FixedPoint FuncName( \ - FixedPoint a, \ - FixedPoint b) { \ - return FixedPoint::FromRaw( \ - ImplFuncName(a.raw(), b.raw())); \ - } - -MAKE_FIXEDPOINT_UNARY_FUNC(operator-, Neg) -MAKE_FIXEDPOINT_UNARY_FUNC(operator~, BitNot) -MAKE_FIXEDPOINT_BINARY_FUNC(operator+, Add) -MAKE_FIXEDPOINT_BINARY_FUNC(operator-, Sub) -MAKE_FIXEDPOINT_BINARY_FUNC(operator&, BitAnd) -MAKE_FIXEDPOINT_BINARY_FUNC(operator^, BitXor) -MAKE_FIXEDPOINT_BINARY_FUNC(operator|, BitOr) -MAKE_FIXEDPOINT_BINARY_FUNC(RoundingHalfSum, RoundingHalfSum) - -#undef MAKE_FIXEDPOINT_UNARY_FUNC -#undef MAKE_FIXEDPOINT_BINARY_FUNC - -#define MAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW(FuncName) \ - template \ - tRawType FuncName(FixedPoint a) { \ - return FuncName(a.raw()); \ - } - -#define MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(FuncName) \ - template \ - tRawType FuncName(FixedPoint a, \ - FixedPoint b) { \ - return FuncName(a.raw(), b.raw()); \ - } - -MAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW(MaskIfZero) -MAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW(MaskIfNonZero) -MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfEqual) -MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfNotEqual) -MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfGreaterThan) -MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfGreaterThanOrEqual) -MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfLessThan) -MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfLessThanOrEqual) - -#undef MAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW -#undef MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW - -template -FixedPoint SelectUsingMask( - tRawType if_mask, FixedPoint then_val, - FixedPoint else_val) { - return FixedPoint::FromRaw( - SelectUsingMask(if_mask, then_val.raw(), else_val.raw())); -} - -template -bool operator==(FixedPoint a, - FixedPoint b) { - return All(MaskIfEqual(a.raw(), b.raw())); -} - -template -bool operator!=(FixedPoint a, - FixedPoint b) { - return !(a == b); -} - -template -FixedPoint SaturatingAdd( - FixedPoint a, - FixedPoint b) { - return FixedPoint::FromRaw( - SaturatingAdd(a.raw(), b.raw())); -} - -template -FixedPoint AddSaturatingIf16Bit( - FixedPoint a, - FixedPoint b) { - return FixedPoint::FromRaw( - AddSaturatingIf16Bit(a.raw(), b.raw())); -} - -// Conversion to floating-point. -template -double ToDouble(FixedPoint x) { - static_assert(FixedPointRawTypeTraits::kLanes == 1, - "not applicable to SIMD types"); - typedef FixedPoint F; - return x.raw() / static_cast(1ll << F::kFractionalBits); -} - -// Rescale changes the number of IntegerBits and updates the underlying -// raw integer value accordingly. -template -FixedPoint Rescale( - FixedPoint x) { - static constexpr int kExponent = tIntegerBitsSrc - tIntegerBitsDst; - FixedPoint result; - result.raw() = SaturatingRoundingMultiplyByPOT(x.raw()); - return result; -} - -// CheckedFixedPointConstant allows to specify fixed-point constants -// initialized as real numbers, in a way that does not compile floating-point -// arithmetic in production code, yet still checks agreement with the -// floating-point expressions when asserts are enabled. -// -// The raw integer value provided is always a int32, encoding a 32-bit -// fixed-point value, regardless of the actual Scalar type. This allows -// writing generic code that applies just as well to the 32-bit and 16-bit -// cases. In the 16-bit case, the raw integer value is internally -// rounding-shifted by 16 bits to the right. -template -inline typename FixedPointType::ScalarRawType RescaleConstantInitializer( - std::int32_t int32_value) { - typedef typename FixedPointType::ScalarRawType ScalarRawType; - static constexpr int ScalarTypeBits = 8 * sizeof(ScalarRawType); - return static_cast( - RoundingDivideByPOT(int32_value, 32 - ScalarTypeBits)); -} -#ifdef GEMMLOWP_ENABLE_FIXEDPOINT_CONSTANTS_CHECKS -template -FixedPointType CheckedFixedPointConstant(std::int32_t raw_value, - double double_value) { - const FixedPointType result = FixedPointType::FromScalarRaw(raw_value); - assert(result == FixedPointType::FromDouble(double_value)); - return result; -} -#define GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPointType, \ - ScalarRawInt32Value, DoubleValue) \ - (gemmlowp::CheckedFixedPointConstant( \ - gemmlowp::RescaleConstantInitializer( \ - ScalarRawInt32Value), \ - DoubleValue)) - -#else -#define GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPointType, \ - ScalarRawInt32Value, DoubleValue) \ - (FixedPointType::FromScalarRaw( \ - gemmlowp::RescaleConstantInitializer( \ - ScalarRawInt32Value))) -#endif - -// Implementation of exponential function. - -// Returns exp(x) for x in [-1/4, 0). -template -FixedPoint exp_on_interval_between_negative_one_quarter_and_0_excl( - FixedPoint a) { - typedef FixedPoint F; - const F constant_term = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F, 1895147668, std::exp(-1.0 / 8.0)); - const F constant_1_over_3 = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F, 715827883, 1.0 / 3.0); - // We're evaluating a Taylor expansion around -1/8, so we do the change of - // variable: x = a + 1/8. - // In fixed-point with 0 integer bits, 1/8 is represented by 1 << 28. - F x = a + F::template ConstantPOT<-3>(); - F x2 = x * x; - F x3 = x2 * x; - F x4 = x2 * x2; - F x4_over_4 = SaturatingRoundingMultiplyByPOT<-2>(x4); - F x4_over_24_plus_x3_over_6_plus_x2_over_2 = - SaturatingRoundingMultiplyByPOT<-1>( - ((x4_over_4 + x3) * constant_1_over_3) + x2); - return AddSaturatingIf16Bit( - constant_term, - constant_term * (x + x4_over_24_plus_x3_over_6_plus_x2_over_2)); -} - -// Returns exp(x) for x < 0. -template -FixedPoint exp_on_negative_values( - FixedPoint a) { - typedef FixedPoint InputF; - typedef FixedPoint ResultF; - static constexpr int kFractionalBits = InputF::kFractionalBits; - static constexpr int kIntegerBits = InputF::kIntegerBits; - const InputF kOneQuarter = InputF::template ConstantPOT<-2>(); - InputF mask = kOneQuarter - InputF::FromScalarRaw(1); - InputF a_mod_quarter_minus_one_quarter = (a & mask) - kOneQuarter; - ResultF result = exp_on_interval_between_negative_one_quarter_and_0_excl( - Rescale<0>(a_mod_quarter_minus_one_quarter)); - tRawType remainder = (a_mod_quarter_minus_one_quarter - a).raw(); - -#define GEMMLOWP_EXP_BARREL_SHIFTER(Exponent, FixedPointMultiplier) \ - if (kIntegerBits > Exponent) { \ - const ResultF kMultiplier = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( \ - ResultF, FixedPointMultiplier, std::exp(-std::pow(2.0, Exponent))); \ - static constexpr int kShiftAmount = \ - kIntegerBits > Exponent ? kFractionalBits + Exponent : 0; \ - result = SelectUsingMask( \ - MaskIfNonZero(BitAnd(remainder, Dup(1 << kShiftAmount))), \ - result * kMultiplier, result); \ - } - - GEMMLOWP_EXP_BARREL_SHIFTER(-2, 1672461947); - GEMMLOWP_EXP_BARREL_SHIFTER(-1, 1302514674); - GEMMLOWP_EXP_BARREL_SHIFTER(+0, 790015084); - GEMMLOWP_EXP_BARREL_SHIFTER(+1, 290630308); - GEMMLOWP_EXP_BARREL_SHIFTER(+2, 39332535); - GEMMLOWP_EXP_BARREL_SHIFTER(+3, 720401); - GEMMLOWP_EXP_BARREL_SHIFTER(+4, 242); - -#undef GEMMLOWP_EXP_BARREL_SHIFTER - - static constexpr int clampB = kIntegerBits > 5 ? 36 - kIntegerBits : 0; - if (kIntegerBits > 5) { - const InputF clamp = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(InputF, -(1 << clampB), -32.0); - result = SelectUsingMask(MaskIfLessThan(a, clamp), ResultF::Zero(), result); - } - - result = SelectUsingMask(MaskIfZero(a), ResultF::One(), result); - return result; -} - -// Implementation of tanh: (1 - exp(-2x)) / (1 + exp(-2x)). - -// Returns (1 - x) / (1 + x) for x in (0, 1). -template -FixedPoint one_minus_x_over_one_plus_x_for_x_in_0_1( - FixedPoint a) { - typedef FixedPoint F0; - typedef FixedPoint F2; - F0 half_denominator = RoundingHalfSum(a, F0::One()); - // Newton-Raphson division - // https://en.wikipedia.org/wiki/Division_algorithm#Newton.E2.80.93Raphson_division - // Refer to that page for the logic behind the 48/17 and 32/17 constants. - const F2 constant_48_over_17 = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F2, 1515870810, 48.0 / 17.0); - const F2 constant_neg_32_over_17 = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F2, -1010580540, -32.0 / 17.0); - F2 x = constant_48_over_17 + half_denominator * constant_neg_32_over_17; - for (int i = 0; i < 3; i++) { - F2 half_denominator_times_x = half_denominator * x; - F2 one_minus_half_denominator_times_x = - F2::One() - half_denominator_times_x; - x = x + Rescale<2>(x * one_minus_half_denominator_times_x); - } - return Rescale<0>(x - F2::One()); -} - -// Returns -tanh(x) for x < 0. -template -FixedPoint neg_tanh_on_negative_values( - FixedPoint a) { - return one_minus_x_over_one_plus_x_for_x_in_0_1( - exp_on_negative_values(ExactMulByPot<1>(a))); -} - -// Returns tanh(x) for any x. -template -FixedPoint tanh(FixedPoint a) { - typedef FixedPoint InputF; - typedef FixedPoint ResultF; - tRawType mask_if_negative = MaskIfLessThan(a, InputF::Zero()); - tRawType mask_if_zero = MaskIfZero(a); - InputF n = SelectUsingMask(mask_if_negative, a, -a); - ResultF t = neg_tanh_on_negative_values(n); - return SelectUsingMask(mask_if_zero, ResultF::Zero(), - SelectUsingMask(mask_if_negative, -t, t)); -} - -// Implementation of logistic function. - -// Returns 1 / (1 + x) for x in (0, 1). -template -FixedPoint one_over_one_plus_x_for_x_in_0_1( - FixedPoint a) { - typedef FixedPoint F0; - typedef FixedPoint F2; - F0 half_denominator = RoundingHalfSum(a, F0::One()); - // Newton-Raphson division - // https://en.wikipedia.org/wiki/Division_algorithm#Newton.E2.80.93Raphson_division - // Refer to that page for the logic behind the 48/17 and 32/17 constants. - const F2 constant_48_over_17 = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F2, 1515870810, 48.0 / 17.0); - const F2 constant_neg_32_over_17 = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F2, -1010580540, -32.0 / 17.0); - F2 x = constant_48_over_17 + half_denominator * constant_neg_32_over_17; - for (int i = 0; i < 3; i++) { - F2 half_denominator_times_x = half_denominator * x; - F2 one_minus_half_denominator_times_x = - F2::One() - half_denominator_times_x; - x = x + Rescale<2>(x * one_minus_half_denominator_times_x); - } - return Rescale<0>(ExactMulByPot<-1>(x)); -} - -// Returns logistic(x) = 1 / (1 + exp(-x)) for x > 0. -template -FixedPoint logistic_on_positive_values( - FixedPoint a) { - return one_over_one_plus_x_for_x_in_0_1(exp_on_negative_values(-a)); -} - -// Returns logistic(x) = 1 / (1 + exp(-x)) for any x. -template -FixedPoint logistic(FixedPoint a) { - typedef FixedPoint InputF; - typedef FixedPoint ResultF; - tRawType mask_if_positive = MaskIfGreaterThan(a, InputF::Zero()); - tRawType mask_if_zero = MaskIfZero(a); - InputF abs_input = SelectUsingMask(mask_if_positive, a, -a); - ResultF result_if_positive = logistic_on_positive_values(abs_input); - ResultF result_if_negative = ResultF::One() - result_if_positive; - const ResultF one_half = - GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(ResultF, 1 << 30, 0.5); - return SelectUsingMask(mask_if_zero, one_half, - SelectUsingMask(mask_if_positive, result_if_positive, - result_if_negative)); -} - -} // end namespace gemmlowp - -#ifdef GEMMLOWP_NEON -#include "./fixedpoint_neon.h" -#elif defined(GEMMLOWP_AVX2) -#include "./fixedpoint_avx.h" -#elif defined(GEMMLOWP_SSE4) -#include "./fixedpoint_sse.h" -#elif defined(GEMMLOWP_MSA) -#include "./fixedpoint_msa.h" -#endif - -#endif // GEMMLOWP_INTERNAL_FIXEDPOINT_H_ diff --git a/code/components/tflite-lib/third_party/gemmlowp/fixedpoint/fixedpoint_neon.h b/code/components/tflite-lib/third_party/gemmlowp/fixedpoint/fixedpoint_neon.h deleted file mode 100644 index 646c5907..00000000 --- a/code/components/tflite-lib/third_party/gemmlowp/fixedpoint/fixedpoint_neon.h +++ /dev/null @@ -1,331 +0,0 @@ -// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// fixedpoint_neon.h: optimized NEON specializations of the templates -// in fixedpoint.h. - -#ifndef GEMMLOWP_INTERNAL_FIXEDPOINT_NEON_H_ -#define GEMMLOWP_INTERNAL_FIXEDPOINT_NEON_H_ - -#include - -namespace gemmlowp { - -template <> -struct FixedPointRawTypeTraits { - typedef std::int32_t ScalarRawType; - static constexpr int kLanes = 4; -}; - -template <> -struct FixedPointRawTypeTraits { - typedef std::int16_t ScalarRawType; - static constexpr int kLanes = 8; -}; - -template <> -inline int32x4_t BitAnd(int32x4_t a, int32x4_t b) { - return vandq_s32(a, b); -} - -template <> -inline int16x8_t BitAnd(int16x8_t a, int16x8_t b) { - return vandq_s16(a, b); -} - -template <> -inline int32x4_t BitOr(int32x4_t a, int32x4_t b) { - return vorrq_s32(a, b); -} - -template <> -inline int16x8_t BitOr(int16x8_t a, int16x8_t b) { - return vorrq_s16(a, b); -} - -template <> -inline int32x4_t BitXor(int32x4_t a, int32x4_t b) { - return veorq_s32(a, b); -} - -template <> -inline int16x8_t BitXor(int16x8_t a, int16x8_t b) { - return veorq_s16(a, b); -} - -template <> -inline int32x4_t BitNot(int32x4_t a) { - return veorq_s32(a, vdupq_n_s32(-1)); -} - -template <> -inline int16x8_t BitNot(int16x8_t a) { - return veorq_s16(a, vdupq_n_s16(-1)); -} - -template <> -inline int32x4_t Add(int32x4_t a, int32x4_t b) { - return vaddq_s32(a, b); -} - -template <> -inline int16x8_t Add(int16x8_t a, int16x8_t b) { - return vaddq_s16(a, b); -} - -template <> -inline int32x4_t Sub(int32x4_t a, int32x4_t b) { - return vsubq_s32(a, b); -} - -template <> -inline int16x8_t Sub(int16x8_t a, int16x8_t b) { - return vsubq_s16(a, b); -} - -template <> -inline int32x4_t Neg(int32x4_t a) { - return vnegq_s32(a); -} - -template <> -inline int16x8_t Neg(int16x8_t a) { - return vnegq_s16(a); -} - -template <> -inline int32x4_t ShiftLeft(int32x4_t a, int offset) { - return vshlq_s32(a, vdupq_n_s32(offset)); -} - -template <> -inline int16x8_t ShiftLeft(int16x8_t a, int offset) { - return vshlq_s16(a, vdupq_n_s16(offset)); -} - -template <> -inline int32x4_t ShiftRight(int32x4_t a, int offset) { - return vshlq_s32(a, vdupq_n_s32(-offset)); -} - -template <> -inline int16x8_t ShiftRight(int16x8_t a, int offset) { - return vshlq_s16(a, vdupq_n_s16(-offset)); -} - -template <> -inline int32x4_t SelectUsingMask(int32x4_t if_mask, int32x4_t then_val, - int32x4_t else_val) { - return vbslq_s32(vreinterpretq_u32_s32(if_mask), then_val, else_val); -} - -template <> -inline int16x8_t SelectUsingMask(int16x8_t if_mask, int16x8_t then_val, - int16x8_t else_val) { - return vbslq_s16(vreinterpretq_u16_s16(if_mask), then_val, else_val); -} - -template <> -inline int32x4_t MaskIfEqual(int32x4_t a, int32x4_t b) { - return vreinterpretq_s32_u32(vceqq_s32(a, b)); -} - -template <> -inline int16x8_t MaskIfEqual(int16x8_t a, int16x8_t b) { - return vreinterpretq_s16_u16(vceqq_s16(a, b)); -} - -template <> -inline int32x4_t MaskIfNotEqual(int32x4_t a, int32x4_t b) { - return BitNot(MaskIfEqual(a, b)); -} - -template <> -inline int16x8_t MaskIfNotEqual(int16x8_t a, int16x8_t b) { - return BitNot(MaskIfEqual(a, b)); -} - -template <> -inline int32x4_t MaskIfZero(int32x4_t a) { - return MaskIfEqual(a, vdupq_n_s32(0)); -} - -template <> -inline int16x8_t MaskIfZero(int16x8_t a) { - return MaskIfEqual(a, vdupq_n_s16(0)); -} - -template <> -inline int32x4_t MaskIfNonZero(int32x4_t a) { - return vreinterpretq_s32_u32(vtstq_s32(a, a)); -} - -template <> -inline int16x8_t MaskIfNonZero(int16x8_t a) { - return vreinterpretq_s16_u16(vtstq_s16(a, a)); -} - -template <> -inline int32x4_t MaskIfGreaterThan(int32x4_t a, int32x4_t b) { - return vreinterpretq_s32_u32(vcgtq_s32(a, b)); -} - -template <> -inline int16x8_t MaskIfGreaterThan(int16x8_t a, int16x8_t b) { - return vreinterpretq_s16_u16(vcgtq_s16(a, b)); -} - -template <> -inline int32x4_t MaskIfGreaterThanOrEqual(int32x4_t a, int32x4_t b) { - return vreinterpretq_s32_u32(vcgeq_s32(a, b)); -} - -template <> -inline int16x8_t MaskIfGreaterThanOrEqual(int16x8_t a, int16x8_t b) { - return vreinterpretq_s16_u16(vcgeq_s16(a, b)); -} - -template <> -inline int32x4_t MaskIfLessThan(int32x4_t a, int32x4_t b) { - return vreinterpretq_s32_u32(vcltq_s32(a, b)); -} - -template <> -inline int16x8_t MaskIfLessThan(int16x8_t a, int16x8_t b) { - return vreinterpretq_s16_u16(vcltq_s16(a, b)); -} - -template <> -inline int32x4_t MaskIfLessThanOrEqual(int32x4_t a, int32x4_t b) { - return vreinterpretq_s32_u32(vcleq_s32(a, b)); -} - -template <> -inline int16x8_t MaskIfLessThanOrEqual(int16x8_t a, int16x8_t b) { - return vreinterpretq_s16_u16(vcleq_s16(a, b)); -} - -template <> -inline bool All(int32x4_t a) { - a = vandq_s32(a, vextq_s32(a, a, 1)); - a = vandq_s32(a, vextq_s32(a, a, 2)); - return vgetq_lane_s32(a, 0); -} - -template <> -inline bool All(int16x8_t a) { - a = vandq_s16(a, vextq_s16(a, a, 1)); - a = vandq_s16(a, vextq_s16(a, a, 2)); - a = vandq_s16(a, vextq_s16(a, a, 4)); - return vgetq_lane_s16(a, 0); -} - -template <> -inline bool Any(int32x4_t a) { - a = vorrq_s32(a, vextq_s32(a, a, 1)); - a = vorrq_s32(a, vextq_s32(a, a, 2)); - return vgetq_lane_s32(a, 0); -} - -template <> -inline bool Any(int16x8_t a) { - a = vorrq_s16(a, vextq_s16(a, a, 1)); - a = vorrq_s16(a, vextq_s16(a, a, 2)); - a = vorrq_s16(a, vextq_s16(a, a, 4)); - return vgetq_lane_s16(a, 0); -} - -template <> -inline int32x4_t RoundingHalfSum(int32x4_t a, int32x4_t b) { - return vrhaddq_s32(a, b); -} - -template <> -inline int16x8_t RoundingHalfSum(int16x8_t a, int16x8_t b) { - return vrhaddq_s16(a, b); -} - -template <> -inline int32x4_t SaturatingRoundingDoublingHighMul(int32x4_t a, int32x4_t b) { - return vqrdmulhq_s32(a, b); -} - -template <> -inline int16x8_t SaturatingRoundingDoublingHighMul(int16x8_t a, int16x8_t b) { - return vqrdmulhq_s16(a, b); -} - -template <> -inline int32x4_t RoundingDivideByPOT(int32x4_t x, int exponent) { - const int32x4_t shift_vec = vdupq_n_s32(-exponent); - const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31); - const int32x4_t fixed_up_x = vqaddq_s32(x, fixup); - return vrshlq_s32(fixed_up_x, shift_vec); -} - -template <> -inline int16x8_t RoundingDivideByPOT(int16x8_t x, int exponent) { - const int16x8_t shift_vec = vdupq_n_s16(-exponent); - const int16x8_t fixup = vshrq_n_s16(vandq_s16(x, shift_vec), 15); - const int16x8_t fixed_up_x = vqaddq_s16(x, fixup); - return vrshlq_s16(fixed_up_x, shift_vec); -} - -template -struct ImplSaturatingRoundingMultiplyByPOT { - static int32x4_t eval(int32x4_t x) { return vqshlq_n_s32(x, Exponent); } -}; - -template -struct ImplSaturatingRoundingMultiplyByPOT { - static int32x4_t eval(int32x4_t x) { - const int32x4_t fixup = vshrq_n_s32(x, 31); - const int32x4_t fixed_up_x = vqaddq_s32(x, fixup); - return vrshrq_n_s32(fixed_up_x, -Exponent); - } -}; - -template -struct ImplSaturatingRoundingMultiplyByPOT { - static int16x8_t eval(int16x8_t x) { return vqshlq_n_s16(x, Exponent); } -}; - -template -struct ImplSaturatingRoundingMultiplyByPOT { - static int16x8_t eval(int16x8_t x) { - const int16x8_t fixup = vshrq_n_s16(x, 15); - const int16x8_t fixed_up_x = vqaddq_s16(x, fixup); - return vrshrq_n_s16(fixed_up_x, -Exponent); - } -}; - -template <> -inline int32x4_t Dup(std::int32_t x) { - return vdupq_n_s32(x); -} - -template <> -inline int16x8_t Dup(std::int16_t x) { - return vdupq_n_s16(x); -} - -// So far this is only needed for int16. -template <> -inline int16x8_t SaturatingAdd(int16x8_t a, int16x8_t b) { - return vqaddq_s16(a, b); -} - -} // end namespace gemmlowp - -#endif // GEMMLOWP_INTERNAL_FIXEDPOINT_NEON_H_ diff --git a/code/components/tflite-lib/third_party/gemmlowp/fixedpoint/fixedpoint_sse.h b/code/components/tflite-lib/third_party/gemmlowp/fixedpoint/fixedpoint_sse.h deleted file mode 100644 index a1fae32d..00000000 --- a/code/components/tflite-lib/third_party/gemmlowp/fixedpoint/fixedpoint_sse.h +++ /dev/null @@ -1,384 +0,0 @@ -// Copyright 2015 Google Inc. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// fixedpoint_SSE.h: optimized SSE specializations of the templates -// in fixedpoint.h. - -#ifndef GEMMLOWP_INTERNAL_FIXEDPOINT_SSE_H_ -#define GEMMLOWP_INTERNAL_FIXEDPOINT_SSE_H_ - -#include -#include "fixedpoint.h" - -namespace gemmlowp { - -// SSE intrinsics are not finely typed: there is a single __m128i vector -// type that does not distinguish between "int32x4" and "int16x8" use -// cases, unlike the NEON equivalents. Because we had initially focused -// on int32x4, we did not pay attention and specialized these fixedpoint -// templates directly for __m128i hardcoding the int32x4 semantics, -// not leaving room for int16x8 semantics. Amending that by adding a separate -// data type, int16x8_m128i, that wraps __m128i while being a separate -// type. -struct int16x8_m128i { - int16x8_m128i() {} - explicit int16x8_m128i(__m128i w) : v(w) {} - ~int16x8_m128i() {} - - __m128i v; -}; - -template <> -struct FixedPointRawTypeTraits<__m128i> { - typedef std::int32_t ScalarRawType; - static constexpr int kLanes = 4; -}; - -template <> -struct FixedPointRawTypeTraits { - typedef std::int16_t ScalarRawType; - static constexpr int kLanes = 8; -}; - -template <> -inline __m128i BitAnd(__m128i a, __m128i b) { - return _mm_and_si128(a, b); -} - -template <> -inline int16x8_m128i BitAnd(int16x8_m128i a, int16x8_m128i b) { - return int16x8_m128i(_mm_and_si128(a.v, b.v)); -} - -template <> -inline __m128i BitOr(__m128i a, __m128i b) { - return _mm_or_si128(a, b); -} - -template <> -inline int16x8_m128i BitOr(int16x8_m128i a, int16x8_m128i b) { - return int16x8_m128i(_mm_or_si128(a.v, b.v)); -} - -template <> -inline __m128i BitXor(__m128i a, __m128i b) { - return _mm_xor_si128(a, b); -} - -template <> -inline int16x8_m128i BitXor(int16x8_m128i a, int16x8_m128i b) { - return int16x8_m128i(_mm_xor_si128(a.v, b.v)); -} - -template <> -inline __m128i BitNot(__m128i a) { - return _mm_andnot_si128(a, _mm_set1_epi32(-1)); -} - -template <> -inline int16x8_m128i BitNot(int16x8_m128i a) { - return int16x8_m128i(_mm_andnot_si128(a.v, _mm_set1_epi16(-1))); -} - -template <> -inline __m128i Add(__m128i a, __m128i b) { - return _mm_add_epi32(a, b); -} - -template <> -inline int16x8_m128i Add(int16x8_m128i a, int16x8_m128i b) { - return int16x8_m128i(_mm_add_epi16(a.v, b.v)); -} - -template <> -inline __m128i Mul(__m128i a, __m128i b) { - return _mm_mullo_epi32(a, b); -} - -template <> -inline int16x8_m128i Mul(int16x8_m128i a, int16x8_m128i b) { - return int16x8_m128i(_mm_mullo_epi16(a.v, b.v)); -} - -template <> -inline __m128i Sub(__m128i a, __m128i b) { - return _mm_sub_epi32(a, b); -} - -template <> -inline int16x8_m128i Sub(int16x8_m128i a, int16x8_m128i b) { - return int16x8_m128i(_mm_sub_epi16(a.v, b.v)); -} - -template <> -inline __m128i Neg(__m128i a) { - return _mm_sign_epi32(a, _mm_set1_epi32(-1)); -} - -template <> -inline int16x8_m128i Neg(int16x8_m128i a) { - return int16x8_m128i(_mm_sign_epi16(a.v, _mm_set1_epi16(-1))); -} - -template <> -inline __m128i ShiftLeft(__m128i a, int offset) { - return _mm_slli_epi32(a, offset); -} - -template <> -inline int16x8_m128i ShiftLeft(int16x8_m128i a, int offset) { - return int16x8_m128i(_mm_slli_epi16(a.v, offset)); -} - -template <> -inline __m128i ShiftRight(__m128i a, int offset) { - return _mm_srai_epi32(a, offset); -} - -template <> -inline int16x8_m128i ShiftRight(int16x8_m128i a, int offset) { - return int16x8_m128i(_mm_srai_epi16(a.v, offset)); -} - -template <> -inline __m128i SelectUsingMask(__m128i if_mask, __m128i then_val, - __m128i else_val) { - // borrowed from Intel's arm_neon_sse.h header. - return _mm_or_si128(_mm_and_si128(if_mask, then_val), - _mm_andnot_si128(if_mask, else_val)); -} - -template <> -inline int16x8_m128i SelectUsingMask(int16x8_m128i if_mask, - int16x8_m128i then_val, - int16x8_m128i else_val) { - // borrowed from Intel's arm_neon_sse.h header. - return int16x8_m128i(SelectUsingMask(if_mask.v, then_val.v, else_val.v)); -} - -template <> -inline __m128i MaskIfEqual(__m128i a, __m128i b) { - return _mm_cmpeq_epi32(a, b); -} - -template <> -inline int16x8_m128i MaskIfEqual(int16x8_m128i a, int16x8_m128i b) { - return int16x8_m128i(_mm_cmpeq_epi16(a.v, b.v)); -} - -template <> -inline __m128i MaskIfNotEqual(__m128i a, __m128i b) { - return BitNot(MaskIfEqual(a, b)); -} - -template <> -inline int16x8_m128i MaskIfNotEqual(int16x8_m128i a, int16x8_m128i b) { - return BitNot(MaskIfEqual(a, b)); -} - -template <> -inline __m128i MaskIfZero(__m128i a) { - return MaskIfEqual(a, _mm_set1_epi32(0)); -} - -template <> -inline int16x8_m128i MaskIfZero(int16x8_m128i a) { - return MaskIfEqual(a, int16x8_m128i(_mm_set1_epi16(0))); -} - -template <> -inline __m128i MaskIfNonZero(__m128i a) { - return MaskIfNotEqual(a, _mm_set1_epi32(0)); -} - -template <> -inline int16x8_m128i MaskIfNonZero(int16x8_m128i a) { - return MaskIfNotEqual(a, int16x8_m128i(_mm_set1_epi16(0))); -} - -template <> -inline __m128i MaskIfGreaterThan(__m128i a, __m128i b) { - return _mm_cmpgt_epi32(a, b); -} - -template <> -inline int16x8_m128i MaskIfGreaterThan(int16x8_m128i a, int16x8_m128i b) { - return int16x8_m128i(_mm_cmpgt_epi16(a.v, b.v)); -} - -template <> -inline __m128i MaskIfLessThan(__m128i a, __m128i b) { - return _mm_cmplt_epi32(a, b); -} - -template <> -inline int16x8_m128i MaskIfLessThan(int16x8_m128i a, int16x8_m128i b) { - return int16x8_m128i(_mm_cmplt_epi16(a.v, b.v)); -} - -template <> -inline __m128i MaskIfGreaterThanOrEqual(__m128i a, __m128i b) { - return BitNot(MaskIfLessThan(a, b)); -} - -template <> -inline int16x8_m128i MaskIfGreaterThanOrEqual(int16x8_m128i a, - int16x8_m128i b) { - return BitNot(MaskIfLessThan(a, b)); -} - -template <> -inline __m128i MaskIfLessThanOrEqual(__m128i a, __m128i b) { - return BitNot(MaskIfGreaterThan(a, b)); -} - -template <> -inline int16x8_m128i MaskIfLessThanOrEqual(int16x8_m128i a, int16x8_m128i b) { - return BitNot(MaskIfGreaterThan(a, b)); -} - -/* Assumptions: - - All and Any are used on masks. - - masks are all_ones for true lanes, all_zeroes otherwise. -Hence, All means all 128bits set, and Any means any bit set. -*/ - -template <> -inline bool All(__m128i a) { - return _mm_testc_si128(a, a); -} - -template <> -inline bool All(int16x8_m128i a) { - return _mm_testc_si128(a.v, a.v); -} - -template <> -inline bool Any(__m128i a) { - return !_mm_testz_si128(a, a); -} - -template <> -inline bool Any(int16x8_m128i a) { - return !_mm_testz_si128(a.v, a.v); -} - -template <> -inline __m128i RoundingHalfSum(__m128i a, __m128i b) { - /* __m128i round_bit_mask, a_over_2, b_over_2, round_bit, sum; */ - /* We divide the inputs before the add to avoid the overflow and costly test - */ - /* of checking if an overflow occured on signed add */ - /* round_bit_mask = _mm_set1_epi32(1); */ - /* a_over_2 = _mm_srai_epi32(a, 1); */ - /* b_over_2 = _mm_srai_epi32(b, 1); */ - /* sum = Add(a_over_2, b_over_2); */ - /* round_bit = _mm_sign_epi32(BitAnd(BitOr(a,b), round_bit_mask), sum); */ - /* return Add(sum, round_bit); */ - - /* Other possibility detecting overflow and xor the sign if an overflow - * happened*/ - __m128i one, sign_bit_mask, sum, rounded_half_sum, overflow, result; - one = _mm_set1_epi32(1); - sign_bit_mask = _mm_set1_epi32(0x80000000); - sum = Add(a, b); - rounded_half_sum = _mm_srai_epi32(Add(sum, one), 1); - overflow = - BitAnd(BitAnd(BitXor(a, rounded_half_sum), BitXor(b, rounded_half_sum)), - sign_bit_mask); - result = BitXor(rounded_half_sum, overflow); - return result; -} - -template <> -inline int16x8_m128i RoundingHalfSum(int16x8_m128i a, int16x8_m128i b) { - // Idea: go to unsigned to use _mm_avg_epu16, - // borrowed from Intel's arm_neon_sse.h header. - __m128i constant_neg_32768 = _mm_set1_epi16(-32768); - __m128i a_unsigned = _mm_sub_epi16(a.v, constant_neg_32768); - __m128i b_unsigned = _mm_sub_epi16(b.v, constant_neg_32768); - __m128i avg_unsigned = _mm_avg_epu16(a_unsigned, b_unsigned); - __m128i avg = _mm_add_epi16(avg_unsigned, constant_neg_32768); - return int16x8_m128i(avg); -} - -template <> -inline __m128i SaturatingRoundingDoublingHighMul(__m128i a, __m128i b) { - __m128i min, saturation_mask, a0_a2, a1_a3, b0_b2, b1_b3; - __m128i a0b0_a2b2, a1b1_a3b3, a0b0_a2b2_rounded, a1b1_a3b3_rounded; - __m128i a0b0_a2b2_rounded_2x, a1b1_a3b3_rounded_2x, result; - __m128i nudge; - - // saturation only happen if a == b == INT_MIN - min = _mm_set1_epi32(std::numeric_limits::min()); - saturation_mask = BitAnd(MaskIfEqual(a, b), MaskIfEqual(a, min)); - - // a = a0 | a1 | a2 | a3 - // b = b0 | b1 | b2 | b3 - a0_a2 = a; - a1_a3 = _mm_srli_si128(a, 4); - b0_b2 = b; - b1_b3 = _mm_srli_si128(b, 4); - - a0b0_a2b2 = _mm_mul_epi32(a0_a2, b0_b2); - a1b1_a3b3 = _mm_mul_epi32(a1_a3, b1_b3); - - // do the rounding and take into account that it will be doubled - nudge = _mm_set1_epi64x(1 << 30); - a0b0_a2b2_rounded = _mm_add_epi64(a0b0_a2b2, nudge); - a1b1_a3b3_rounded = _mm_add_epi64(a1b1_a3b3, nudge); - - // do the doubling - a0b0_a2b2_rounded_2x = _mm_slli_epi64(a0b0_a2b2_rounded, 1); - a1b1_a3b3_rounded_2x = _mm_slli_epi64(a1b1_a3b3_rounded, 1); - - // get the high part of the products - result = _mm_blend_epi16(_mm_srli_si128(a0b0_a2b2_rounded_2x, 4), - a1b1_a3b3_rounded_2x, 0xcc); - - // saturate those which overflowed - return SelectUsingMask(saturation_mask, min, result); -} - -template <> -inline int16x8_m128i SaturatingRoundingDoublingHighMul(int16x8_m128i a, - int16x8_m128i b) { - // Idea: use _mm_mulhrs_epi16 then saturate with a bit-operation, - // borrowed from Intel's arm_neon_sse.h header. - __m128i result_unsaturated = _mm_mulhrs_epi16(a.v, b.v); - __m128i saturation_mask = - _mm_cmpeq_epi16(result_unsaturated, _mm_set1_epi16(0x8000)); - __m128i result = _mm_xor_si128(result_unsaturated, saturation_mask); - return int16x8_m128i(result); -} - -template <> -inline __m128i Dup<__m128i>(std::int32_t x) { - return _mm_set1_epi32(x); -} - -template <> -inline int16x8_m128i Dup(std::int16_t x) { - return int16x8_m128i(_mm_set1_epi16(x)); -} - -// So far this is only needed for int16. -template <> -inline int16x8_m128i SaturatingAdd(int16x8_m128i a, int16x8_m128i b) { - return int16x8_m128i(_mm_adds_epi16(a.v, b.v)); -} - -} // end namespace gemmlowp - -#endif // GEMMLOWP_INTERNAL_FIXEDPOINT_SSE_H_ diff --git a/code/components/tflite-lib/third_party/gemmlowp/internal/detect_platform.h b/code/components/tflite-lib/third_party/gemmlowp/internal/detect_platform.h deleted file mode 100644 index 6f06d19f..00000000 --- a/code/components/tflite-lib/third_party/gemmlowp/internal/detect_platform.h +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2018 The Gemmlowp Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// detect_platform.h: Sets up macros that control architecture-specific -// features of gemmlowp's implementation. - -#ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ -#define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ - -// Our inline assembly path assume GCC/Clang syntax. -// Native Client doesn't seem to support inline assembly(?). -#if defined(__GNUC__) && !defined(__native_client__) -#define GEMMLOWP_ALLOW_INLINE_ASM -#endif - -// Define macro statement that avoids inlining for GCC. -// For non-GCC, define as empty macro. -#if defined(__GNUC__) -#define GEMMLOWP_NOINLINE __attribute__((noinline)) -#else -#define GEMMLOWP_NOINLINE -#endif - -// Detect ARM, 32-bit or 64-bit -#ifdef __arm__ -#define GEMMLOWP_ARM_32 -#endif - -#ifdef __aarch64__ -#define GEMMLOWP_ARM_64 -#endif - -#if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64) -#define GEMMLOWP_ARM -#endif - -// Detect MIPS, 32-bit or 64-bit -#if defined(__mips) && !defined(__LP64__) -#define GEMMLOWP_MIPS_32 -#endif - -#if defined(__mips) && defined(__LP64__) -#define GEMMLOWP_MIPS_64 -#endif - -#if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64) -#define GEMMLOWP_MIPS -#endif - -// Detect x86, 32-bit or 64-bit -#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386) -#define GEMMLOWP_X86_32 -#endif - -#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64) -#define GEMMLOWP_X86_64 -#endif - -#if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64) -#define GEMMLOWP_X86 -#endif - -// Some of our optimized paths use inline assembly and for -// now we don't bother enabling some other optimized paths using intrinddics -// where we can't use inline assembly paths. -#ifdef GEMMLOWP_ALLOW_INLINE_ASM - -// Detect NEON. It's important to check for both tokens. -#if (defined __ARM_NEON) || (defined __ARM_NEON__) -#define GEMMLOWP_NEON -#endif - -// Convenience NEON tokens for 32-bit or 64-bit -#if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32) -#define GEMMLOWP_NEON_32 -#endif - -#if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64) -#define GEMMLOWP_NEON_64 -#endif - -// Detect MIPS MSA. -// Limit MSA optimizations to little-endian CPUs for now. -// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs? -#if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \ - defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -#define GEMMLOWP_MSA -#endif - -// Convenience MIPS MSA tokens for 32-bit or 64-bit. -#if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32) -#define GEMMLOWP_MSA_32 -#endif - -#if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64) -#define GEMMLOWP_MSA_64 -#endif - -// compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2 -// Detect AVX2 -#if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2) -#define GEMMLOWP_AVX2 -// Detect SSE4. -// MSVC does not have __SSE4_1__ macro, but will enable SSE4 -// when AVX is turned on. -#elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__)) -#define GEMMLOWP_SSE4 -// Detect SSE3. -#elif defined(__SSE3__) -#define GEMMLOWP_SSE3 -#endif - -// Convenience SSE4 tokens for 32-bit or 64-bit -#if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \ - !defined(GEMMLOWP_DISABLE_SSE4) -#define GEMMLOWP_SSE4_32 -#endif - -#if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32) -#define GEMMLOWP_SSE3_32 -#endif - -#if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \ - !defined(GEMMLOWP_DISABLE_SSE4) -#define GEMMLOWP_SSE4_64 -#endif - -#if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64) -#define GEMMLOWP_SSE3_64 -#endif - -#if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64) -#define GEMMLOWP_AVX2_64 -#endif - -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -#include -#define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison -#elif __has_feature(address_sanitizer) -#include -#define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region -#endif -#endif - -#endif // GEMMLOWP_ALLOW_INLINE_ASM - -// Detect Android. Don't conflate with ARM - we care about tuning -// for non-ARM Android devices too. This can be used in conjunction -// with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs. -#if defined(__ANDROID__) || defined(ANDROID) -#define GEMMLOWP_ANDROID -#endif - -#endif // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ diff --git a/code/components/tflite-lib/third_party/kissfft/COPYING b/code/components/tflite-lib/third_party/kissfft/COPYING deleted file mode 100644 index 2fc6685a..00000000 --- a/code/components/tflite-lib/third_party/kissfft/COPYING +++ /dev/null @@ -1,11 +0,0 @@ -Copyright (c) 2003-2010 Mark Borgerding - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/code/components/tflite-lib/third_party/kissfft/_kiss_fft_guts.h b/code/components/tflite-lib/third_party/kissfft/_kiss_fft_guts.h deleted file mode 100644 index 1a0f4c26..00000000 --- a/code/components/tflite-lib/third_party/kissfft/_kiss_fft_guts.h +++ /dev/null @@ -1,168 +0,0 @@ -#ifndef _KISS_FFT_GUTS_H -#define _KISS_FFT_GUTS_H - -/* -Copyright (c) 2003-2010, Mark Borgerding - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* kiss_fft.h - defines kiss_fft_scalar as either short or a float type - and defines - typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ -#include "kiss_fft.h" -#include - -#define MAXFACTORS 32 -/* e.g. an fft of length 128 has 4 factors - as far as kissfft is concerned - 4*4*4*2 - */ - -struct kiss_fft_state{ - int nfft; - int inverse; - int factors[2*MAXFACTORS]; - kiss_fft_cpx twiddles[1]; -}; - -/* - Explanation of macros dealing with complex math: - - C_MUL(m,a,b) : m = a*b - C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise - C_SUB( res, a,b) : res = a - b - C_SUBFROM( res , a) : res -= a - C_ADDTO( res , a) : res += a - * */ -#ifdef FIXED_POINT -#if (FIXED_POINT==32) -# define FRACBITS 31 -# define SAMPPROD int64_t -#define SAMP_MAX 2147483647 -#else -# define FRACBITS 15 -# define SAMPPROD int32_t -#define SAMP_MAX 32767 -#endif - -#define SAMP_MIN -SAMP_MAX - -#if defined(CHECK_OVERFLOW) -# define CHECK_OVERFLOW_OP(a,op,b) \ - if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \ - fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) ); } -#endif - - -# define smul(a,b) ( (SAMPPROD)(a)*(b) ) -# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS ) - -# define S_MUL(a,b) sround( smul(a,b) ) - -# define C_MUL(m,a,b) \ - do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \ - (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0) - -# define DIVSCALAR(x,k) \ - (x) = sround( smul( x, SAMP_MAX/k ) ) - -# define C_FIXDIV(c,div) \ - do { DIVSCALAR( (c).r , div); \ - DIVSCALAR( (c).i , div); }while (0) - -# define C_MULBYSCALAR( c, s ) \ - do{ (c).r = sround( smul( (c).r , s ) ) ;\ - (c).i = sround( smul( (c).i , s ) ) ; }while(0) - -#else /* not FIXED_POINT*/ - -# define S_MUL(a,b) ( (a)*(b) ) -#define C_MUL(m,a,b) \ - do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ - (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) -# define C_FIXDIV(c,div) /* NOOP */ -# define C_MULBYSCALAR( c, s ) \ - do{ (c).r *= (s);\ - (c).i *= (s); }while(0) -#endif - -#ifndef CHECK_OVERFLOW_OP -# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ -#endif - -#define C_ADD( res, a,b)\ - do { \ - CHECK_OVERFLOW_OP((a).r,+,(b).r)\ - CHECK_OVERFLOW_OP((a).i,+,(b).i)\ - (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ - }while(0) -#define C_SUB( res, a,b)\ - do { \ - CHECK_OVERFLOW_OP((a).r,-,(b).r)\ - CHECK_OVERFLOW_OP((a).i,-,(b).i)\ - (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ - }while(0) -#define C_ADDTO( res , a)\ - do { \ - CHECK_OVERFLOW_OP((res).r,+,(a).r)\ - CHECK_OVERFLOW_OP((res).i,+,(a).i)\ - (res).r += (a).r; (res).i += (a).i;\ - }while(0) - -#define C_SUBFROM( res , a)\ - do {\ - CHECK_OVERFLOW_OP((res).r,-,(a).r)\ - CHECK_OVERFLOW_OP((res).i,-,(a).i)\ - (res).r -= (a).r; (res).i -= (a).i; \ - }while(0) - - -#ifdef FIXED_POINT -# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase)) -# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase)) -# define HALF_OF(x) ((x)>>1) -#elif defined(USE_SIMD) -# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) -# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) -# define HALF_OF(x) ((x)*_mm_set1_ps(.5)) -#else -# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) -# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) -# define HALF_OF(x) ((x)*(kiss_fft_scalar).5) -#endif - -#define kf_cexp(x,phase) \ - do{ \ - (x)->r = KISS_FFT_COS(phase);\ - (x)->i = KISS_FFT_SIN(phase);\ - }while(0) - - -/* a debugging function */ -#define pcpx(c)\ - fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) ) - - -#ifdef KISS_FFT_USE_ALLOCA -// define this to allow use of alloca instead of malloc for temporary buffers -// Temporary buffers are used in two case: -// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5 -// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform. -#include -#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes) -#define KISS_FFT_TMP_FREE(ptr) -#else -#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes) -#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr) -#endif -#endif // _KISS_FFT_GUTS_H diff --git a/code/components/tflite-lib/third_party/kissfft/kiss_fft.c b/code/components/tflite-lib/third_party/kissfft/kiss_fft.c deleted file mode 100644 index 9133a013..00000000 --- a/code/components/tflite-lib/third_party/kissfft/kiss_fft.c +++ /dev/null @@ -1,408 +0,0 @@ -/* -Copyright (c) 2003-2010, Mark Borgerding - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -#include "_kiss_fft_guts.h" -/* The guts header contains all the multiplication and addition macros that are defined for - fixed or floating point complex numbers. It also delares the kf_ internal functions. - */ - -static void kf_bfly2( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_cfg st, - int m - ) -{ - kiss_fft_cpx * Fout2; - kiss_fft_cpx * tw1 = st->twiddles; - kiss_fft_cpx t; - Fout2 = Fout + m; - do{ - C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); - - C_MUL (t, *Fout2 , *tw1); - tw1 += fstride; - C_SUB( *Fout2 , *Fout , t ); - C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; - }while (--m); -} - -static void kf_bfly4( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_cfg st, - const size_t m - ) -{ - kiss_fft_cpx *tw1,*tw2,*tw3; - kiss_fft_cpx scratch[6]; - size_t k=m; - const size_t m2=2*m; - const size_t m3=3*m; - - - tw3 = tw2 = tw1 = st->twiddles; - - do { - C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); - - C_MUL(scratch[0],Fout[m] , *tw1 ); - C_MUL(scratch[1],Fout[m2] , *tw2 ); - C_MUL(scratch[2],Fout[m3] , *tw3 ); - - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - if(st->inverse) { - Fout[m].r = scratch[5].r - scratch[4].i; - Fout[m].i = scratch[5].i + scratch[4].r; - Fout[m3].r = scratch[5].r + scratch[4].i; - Fout[m3].i = scratch[5].i - scratch[4].r; - }else{ - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - } - ++Fout; - }while(--k); -} - -static void kf_bfly3( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_cfg st, - size_t m - ) -{ - size_t k=m; - const size_t m2 = 2*m; - kiss_fft_cpx *tw1,*tw2; - kiss_fft_cpx scratch[5]; - kiss_fft_cpx epi3; - epi3 = st->twiddles[fstride*m]; - - tw1=tw2=st->twiddles; - - do{ - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); - - C_MUL(scratch[1],Fout[m] , *tw1); - C_MUL(scratch[2],Fout[m2] , *tw2); - - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; - - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - - C_MULBYSCALAR( scratch[0] , epi3.i ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; - - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; - - ++Fout; - }while(--k); -} - -static void kf_bfly5( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_cfg st, - int m - ) -{ - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - int u; - kiss_fft_cpx scratch[13]; - kiss_fft_cpx * twiddles = st->twiddles; - kiss_fft_cpx *tw; - kiss_fft_cpx ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=st->twiddles; - for ( u=0; ur += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); - scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); - scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } -} - -/* perform the butterfly for one stage of a mixed radix FFT */ -static void kf_bfly_generic( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_cfg st, - int m, - int p - ) -{ - int u,k,q1,q; - kiss_fft_cpx * twiddles = st->twiddles; - kiss_fft_cpx t; - int Norig = st->nfft; - - kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); - - for ( u=0; u=Norig) twidx-=Norig; - C_MUL(t,scratch[q] , twiddles[twidx] ); - C_ADDTO( Fout[ k ] ,t); - } - k += m; - } - } - KISS_FFT_TMP_FREE(scratch); -} - -static -void kf_work( - kiss_fft_cpx * Fout, - const kiss_fft_cpx * f, - const size_t fstride, - int in_stride, - int * factors, - const kiss_fft_cfg st - ) -{ - kiss_fft_cpx * Fout_beg=Fout; - const int p=*factors++; /* the radix */ - const int m=*factors++; /* stage's fft length/p */ - const kiss_fft_cpx * Fout_end = Fout + p*m; - -#ifdef _OPENMP - // use openmp extensions at the - // top-level (not recursive) - if (fstride==1 && p<=5) - { - int k; - - // execute the p different work units in different threads -# pragma omp parallel for - for (k=0;k floor_sqrt) - p = n; /* no more factors, skip to end */ - } - n /= p; - *facbuf++ = p; - *facbuf++ = n; - } while (n > 1); -} - -/* - * - * User-callable function to allocate all necessary storage space for the fft. - * - * The return value is a contiguous block of memory, allocated with malloc. As such, - * It can be freed with free(), rather than a kiss_fft-specific function. - * */ -kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) -{ - kiss_fft_cfg st=NULL; - size_t memneeded = sizeof(struct kiss_fft_state) - + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/ - - if ( lenmem==NULL ) { - st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded ); - }else{ - if (mem != NULL && *lenmem >= memneeded) - st = (kiss_fft_cfg)mem; - *lenmem = memneeded; - } - if (st) { - int i; - st->nfft=nfft; - st->inverse = inverse_fft; - - for (i=0;iinverse) - phase *= -1; - kf_cexp(st->twiddles+i, phase ); - } - - kf_factor(nfft,st->factors); - } - return st; -} - - -void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) -{ - if (fin == fout) { - //NOTE: this is not really an in-place FFT algorithm. - //It just performs an out-of-place FFT into a temp buffer - kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft); - kf_work(tmpbuf,fin,1,in_stride, st->factors,st); - /* memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); */ - KISS_FFT_TMP_FREE(tmpbuf); - }else{ - kf_work( fout, fin, 1,in_stride, st->factors,st ); - } -} - -void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) -{ - kiss_fft_stride(cfg,fin,fout,1); -} - - -void kiss_fft_cleanup(void) -{ - // nothing needed any more -} - -int kiss_fft_next_fast_size(int n) -{ - while(1) { - int m=n; - while ( (m%2) == 0 ) m/=2; - while ( (m%3) == 0 ) m/=3; - while ( (m%5) == 0 ) m/=5; - if (m<=1) - break; /* n is completely factorable by twos, threes, and fives */ - n++; - } - return n; -} diff --git a/code/components/tflite-lib/third_party/kissfft/kiss_fft.h b/code/components/tflite-lib/third_party/kissfft/kiss_fft.h deleted file mode 100644 index 24e4d0c0..00000000 --- a/code/components/tflite-lib/third_party/kissfft/kiss_fft.h +++ /dev/null @@ -1,124 +0,0 @@ -#ifndef KISS_FFT_H -#define KISS_FFT_H - -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C++" { -#endif - -/* - ATTENTION! - If you would like a : - -- a utility that will handle the caching of fft objects - -- real-only (no imaginary time component ) FFT - -- a multi-dimensional FFT - -- a command-line utility to perform ffts - -- a command-line utility to perform fast-convolution filtering - - Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c - in the tools/ directory. -*/ - -#ifdef USE_SIMD -# include -# define kiss_fft_scalar __m128 -#define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16) -#define KISS_FFT_FREE _mm_free -#else -#define KISS_FFT_MALLOC(X) (void*)(0x0) /* Patched. */ -#define KISS_FFT_FREE(X) /* Patched. */ -#endif - - -#ifdef FIXED_POINT -#include /* Patched. */ -# if (FIXED_POINT == 32) -# define kiss_fft_scalar int32_t -# else -# define kiss_fft_scalar int16_t -# endif -#else -# ifndef kiss_fft_scalar -/* default is float */ -# define kiss_fft_scalar float -# endif -#endif - -typedef struct { - kiss_fft_scalar r; - kiss_fft_scalar i; -}kiss_fft_cpx; - -typedef struct kiss_fft_state* kiss_fft_cfg; - -/* - * kiss_fft_alloc - * - * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. - * - * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL); - * - * The return value from fft_alloc is a cfg buffer used internally - * by the fft routine or NULL. - * - * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc. - * The returned value should be free()d when done to avoid memory leaks. - * - * The state can be placed in a user supplied buffer 'mem': - * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, - * then the function places the cfg in mem and the size used in *lenmem - * and returns mem. - * - * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), - * then the function returns NULL and places the minimum cfg - * buffer size in *lenmem. - * */ - -kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); - -/* - * kiss_fft(cfg,in_out_buf) - * - * Perform an FFT on a complex input buffer. - * for a forward FFT, - * fin should be f[0] , f[1] , ... ,f[nfft-1] - * fout will be F[0] , F[1] , ... ,F[nfft-1] - * Note that each element is complex and can be accessed like - f[k].r and f[k].i - * */ -void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); - -/* - A more generic version of the above function. It reads its input from every Nth sample. - * */ -void kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride); - -/* If kiss_fft_alloc allocated a buffer, it is one contiguous - buffer and can be simply free()d when no longer needed*/ -#define kiss_fft_free free - -/* - Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up - your compiler output to call this before you exit. -*/ -void kiss_fft_cleanup(void); - - -/* - * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) - */ -int kiss_fft_next_fast_size(int n); - -/* for real ffts, we need an even size */ -#define kiss_fftr_next_fast_size_real(n) \ - (kiss_fft_next_fast_size( ((n)+1)>>1)<<1) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/code/components/tflite-lib/third_party/kissfft/tools/kiss_fftr.c b/code/components/tflite-lib/third_party/kissfft/tools/kiss_fftr.c deleted file mode 100644 index 0d22a04d..00000000 --- a/code/components/tflite-lib/third_party/kissfft/tools/kiss_fftr.c +++ /dev/null @@ -1,159 +0,0 @@ -/* -Copyright (c) 2003-2004, Mark Borgerding - -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include "kiss_fftr.h" -#include "_kiss_fft_guts.h" - -struct kiss_fftr_state{ - kiss_fft_cfg substate; - kiss_fft_cpx * tmpbuf; - kiss_fft_cpx * super_twiddles; -#ifdef USE_SIMD - void * pad; -#endif -}; - -kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem) -{ - int i; - kiss_fftr_cfg st = NULL; - size_t subsize, memneeded; - - if (nfft & 1) { - /* fprintf(stderr,"Real FFT optimization must be even.\n"); */ - return NULL; - } - nfft >>= 1; - - kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize); - memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2); - - if (lenmem == NULL) { - st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded); - } else { - if (*lenmem >= memneeded) - st = (kiss_fftr_cfg) mem; - *lenmem = memneeded; - } - if (!st) - return NULL; - - st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */ - st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize); - st->super_twiddles = st->tmpbuf + nfft; - kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); - - for (i = 0; i < nfft/2; ++i) { - double phase = - -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5); - if (inverse_fft) - phase *= -1; - kf_cexp (st->super_twiddles+i,phase); - } - return st; -} - -void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) -{ - /* input buffer timedata is stored row-wise */ - int k,ncfft; - kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; - - if ( st->substate->inverse) { - /* fprintf(stderr,"kiss fft usage error: improper alloc\n"); */ - return; /* exit(1); */ - } - - ncfft = st->substate->nfft; - - /*perform the parallel fft of two real signals packed in real,imag*/ - kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf ); - /* The real part of the DC element of the frequency spectrum in st->tmpbuf - * contains the sum of the even-numbered elements of the input time sequence - * The imag part is the sum of the odd-numbered elements - * - * The sum of tdc.r and tdc.i is the sum of the input time sequence. - * yielding DC of input time sequence - * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... - * yielding Nyquist bin of input time sequence - */ - - tdc.r = st->tmpbuf[0].r; - tdc.i = st->tmpbuf[0].i; - C_FIXDIV(tdc,2); - CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); - CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); - freqdata[0].r = tdc.r + tdc.i; - freqdata[ncfft].r = tdc.r - tdc.i; -#ifdef USE_SIMD - freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); -#else - freqdata[ncfft].i = freqdata[0].i = 0; -#endif - - for ( k=1;k <= ncfft/2 ; ++k ) { - fpk = st->tmpbuf[k]; - fpnk.r = st->tmpbuf[ncfft-k].r; - fpnk.i = - st->tmpbuf[ncfft-k].i; - C_FIXDIV(fpk,2); - C_FIXDIV(fpnk,2); - - C_ADD( f1k, fpk , fpnk ); - C_SUB( f2k, fpk , fpnk ); - C_MUL( tw , f2k , st->super_twiddles[k-1]); - - freqdata[k].r = HALF_OF(f1k.r + tw.r); - freqdata[k].i = HALF_OF(f1k.i + tw.i); - freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); - freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); - } -} - -void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) -{ - /* input buffer timedata is stored row-wise */ - int k, ncfft; - - if (st->substate->inverse == 0) { - /* fprintf (stderr, "kiss fft usage error: improper alloc\n"); */ - return; /* exit (1); */ - } - - ncfft = st->substate->nfft; - - st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r; - st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r; - C_FIXDIV(st->tmpbuf[0],2); - - for (k = 1; k <= ncfft / 2; ++k) { - kiss_fft_cpx fk, fnkc, fek, fok, tmp; - fk = freqdata[k]; - fnkc.r = freqdata[ncfft - k].r; - fnkc.i = -freqdata[ncfft - k].i; - C_FIXDIV( fk , 2 ); - C_FIXDIV( fnkc , 2 ); - - C_ADD (fek, fk, fnkc); - C_SUB (tmp, fk, fnkc); - C_MUL (fok, tmp, st->super_twiddles[k-1]); - C_ADD (st->tmpbuf[k], fek, fok); - C_SUB (st->tmpbuf[ncfft - k], fek, fok); -#ifdef USE_SIMD - st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); -#else - st->tmpbuf[ncfft - k].i *= -1; -#endif - } - kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); -} diff --git a/code/components/tflite-lib/third_party/kissfft/tools/kiss_fftr.h b/code/components/tflite-lib/third_party/kissfft/tools/kiss_fftr.h deleted file mode 100644 index b888a28b..00000000 --- a/code/components/tflite-lib/third_party/kissfft/tools/kiss_fftr.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef KISS_FTR_H -#define KISS_FTR_H - -#include "kiss_fft.h" -#ifdef __cplusplus -extern "C++" { -#endif - - -/* - - Real optimized version can save about 45% cpu time vs. complex fft of a real seq. - - - - */ - -typedef struct kiss_fftr_state *kiss_fftr_cfg; - - -kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem); -/* - nfft must be even - - If you don't care to allocate space, use mem = lenmem = NULL -*/ - - -void kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); -/* - input timedata has nfft scalar points - output freqdata has nfft/2+1 complex points -*/ - -void kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); -/* - input freqdata has nfft/2+1 complex points - output timedata has nfft scalar points -*/ - -#define kiss_fftr_free free - -#ifdef __cplusplus -} -#endif -#endif diff --git a/code/components/tflite-lib/third_party/ruy/ruy/profiler/instrumentation.h b/code/components/tflite-lib/third_party/ruy/ruy/profiler/instrumentation.h deleted file mode 100644 index c4df1e68..00000000 --- a/code/components/tflite-lib/third_party/ruy/ruy/profiler/instrumentation.h +++ /dev/null @@ -1,203 +0,0 @@ -/* Copyright 2020 Google LLC. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef RUY_RUY_PROFILER_INSTRUMENTATION_H_ -#define RUY_RUY_PROFILER_INSTRUMENTATION_H_ - -#ifdef RUY_PROFILER -#include -#include -#include -#endif - -namespace ruy { -namespace profiler { - -#ifdef RUY_PROFILER - -// A label is how a code scope is annotated to appear in profiles. -// The stacks that are sampled by the profiler are stacks of such labels. -// A label consists of a literal string, plus optional integer arguments. -class Label { - public: - Label() {} - template - explicit Label(Args... args) { - Set(args...); - } - void Set(const char* format) { - format_ = format; - args_count_ = 0; - } - template - void Set(const char* format, Args... args) { - format_ = format; - args_count_ = sizeof...(args); - SetArgs(0, args...); - } - - void operator=(const Label& other); - - bool operator==(const Label& other) const; - - std::string Formatted() const; - const char* format() const { return format_; } - - private: - void SetArgs(int position, int arg0) { args_[position] = arg0; } - - template - void SetArgs(int position, int arg0, Args... args) { - SetArgs(position, arg0); - SetArgs(position + 1, args...); - } - - static constexpr int kMaxArgs = 4; - const char* format_ = nullptr; - int args_count_ = 0; - int args_[kMaxArgs]; -}; - -namespace detail { - -// Forward-declaration, see class ThreadStack below. -class ThreadStack; - -bool& GlobalIsProfilerRunning(); - -// Returns the global vector of pointers to all stacks, there being one stack -// per thread executing instrumented code. -std::vector* GlobalAllThreadStacks(); - -// Returns the mutex to be locked around any access to GlobalAllThreadStacks(). -std::mutex* GlobalsMutex(); - -// Returns the thread-local stack, specific to the current thread. -ThreadStack* ThreadLocalThreadStack(); - -// This 'stack' is what may be more appropriately called a 'pseudostack': -// It contains Label entries that are 'manually' entered by instrumentation -// code. It's unrelated to real call stacks. -struct Stack { - std::uint32_t id = 0; - static constexpr int kMaxSize = 64; - int size = 0; - Label labels[kMaxSize]; -}; - -// Returns the buffer byte size required by CopyToSample. -int GetBufferSize(const Stack& stack); - -// Copies this Stack into a byte buffer, called a 'sample'. -void CopyToBuffer(const Stack& stack, char* dst); - -// Populates this Stack from an existing sample buffer, typically -// produced by CopyToSample. -void ReadFromBuffer(const char* src, Stack* stack); - -// ThreadStack is meant to be used as a thread-local singleton, assigning to -// each thread a Stack object holding its pseudo-stack of profile labels, -// plus a mutex allowing to synchronize accesses to this pseudo-stack between -// this thread and a possible profiler thread sampling it. -class ThreadStack { - public: - ThreadStack(); - ~ThreadStack(); - - const Stack& stack() const { return stack_; } - - // Returns the mutex to lock around any access to this stack. Each stack is - // accessed by potentially two threads: the thread that it belongs to - // (which calls Push and Pop) and the profiler thread during profiling - // (which calls CopyToSample). - std::mutex& Mutex() const { return mutex_; } - - // Pushes a new label on the top of this Stack. - template - void Push(Args... args) { - // This mutex locking is needed to guard against race conditions as both - // the current thread and the profiler thread may be concurrently accessing - // this stack. In addition to that, this mutex locking also serves the other - // purpose of acting as a barrier (of compiler code reordering, of runtime - // CPU instruction reordering, and of memory access reordering), which - // gives a measure of correctness to this profiler. The downside is some - // latency. As this lock will be uncontended most of the times, the cost - // should be roughly that of an sequentially-consistent atomic access, - // comparable to an access to the level of CPU data cache that is shared - // among all cores, typically 60 cycles on current ARM CPUs, plus side - // effects from barrier instructions. - std::lock_guard lock(mutex_); - // Avoid overrunning the stack, even in 'release' builds. This profiling - // instrumentation code should not ship in release builds anyway, the - // overhead of this check is negligible, and overrunning a stack array would - // be bad. - if (stack_.size >= Stack::kMaxSize) { - abort(); - } - stack_.labels[stack_.size++].Set(args...); - } - - // Pops the top-most label from this Stack. - void Pop() { - // See the comment in Push about this lock. While it would be tempting to - // try to remove this lock and just atomically decrement size_ with a - // store-release, that would not necessarily be a substitute for all of the - // purposes that this lock serves, or if it was done carefully to serve all - // of the same purposes, then that wouldn't be faster than this (mostly - // uncontended) lock. - std::lock_guard lock(mutex_); - stack_.size--; - } - - private: - mutable std::mutex mutex_; - Stack stack_; -}; - -} // namespace detail - -// RAII user-facing way to construct Labels associated with their life scope -// and get them pushed to / popped from the current thread stack. -class ScopeLabel { - public: - template - ScopeLabel(Args... args) : thread_stack_(detail::ThreadLocalThreadStack()) { - thread_stack_->Push(args...); - } - - ~ScopeLabel() { thread_stack_->Pop(); } - - private: - detail::ThreadStack* thread_stack_; -}; - -#else // no RUY_PROFILER - -class ScopeLabel { - public: - template - explicit ScopeLabel(Args...) {} - - // This destructor is needed to consistently silence clang's -Wunused-variable - // which seems to trigger semi-randomly. - ~ScopeLabel() {} -}; - -#endif - -} // namespace profiler -} // namespace ruy - -#endif // RUY_RUY_PROFILER_INSTRUMENTATION_H_