mirror of
https://github.com/jomjol/AI-on-the-edge-device.git
synced 2025-12-10 05:26:52 +03:00
Initial Code v0.1.0
This commit is contained in:
139
code/lib/tfmicro/tensorflow/core/public/version.h
Normal file
139
code/lib/tfmicro/tensorflow/core/public/version.h
Normal file
@@ -0,0 +1,139 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_PUBLIC_VERSION_H_
|
||||
#define TENSORFLOW_CORE_PUBLIC_VERSION_H_
|
||||
|
||||
// TensorFlow uses semantic versioning, see http://semver.org/.
|
||||
|
||||
// Also update tensorflow/tensorflow.bzl and
|
||||
// tensorflow/tools/pip_package/setup.py
|
||||
#define TF_MAJOR_VERSION 2
|
||||
#define TF_MINOR_VERSION 1
|
||||
#define TF_PATCH_VERSION 0
|
||||
|
||||
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
|
||||
// "-beta", "-rc", "-rc.1")
|
||||
#define TF_VERSION_SUFFIX ""
|
||||
|
||||
#define TF_STR_HELPER(x) #x
|
||||
#define TF_STR(x) TF_STR_HELPER(x)
|
||||
|
||||
// e.g. "0.5.0" or "0.6.0-alpha".
|
||||
#define TF_VERSION_STRING \
|
||||
(TF_STR(TF_MAJOR_VERSION) "." TF_STR(TF_MINOR_VERSION) "." TF_STR( \
|
||||
TF_PATCH_VERSION) TF_VERSION_SUFFIX)
|
||||
|
||||
// GraphDef compatibility versions (the versions field in graph.proto).
|
||||
//
|
||||
// Each graph has producer and min_consumer versions, and each
|
||||
// consumer has its own version and a min_producer. In addition, graphs can
|
||||
// mark specific consumer versions as bad (to prevent bugs from executing).
|
||||
// A consumer will execute a graph if the consumer's version is at least the
|
||||
// graph's min_consumer, the graph's producer version is at least the consumer's
|
||||
// min_producer, and the consumer version isn't specifically disallowed by the
|
||||
// graph.
|
||||
//
|
||||
// By default, newly created graphs have producer version TF_GRAPH_DEF_VERSION
|
||||
// min_consumer TF_GRAPH_DEF_MIN_CONSUMER, and no other bad consumer versions.
|
||||
//
|
||||
// Version history:
|
||||
//
|
||||
// 0. Graphs created before GraphDef versioning
|
||||
// 1. First real version (2dec2015)
|
||||
// 2. adjust_contrast only takes float, doesn't perform clamping (11dec2015)
|
||||
// 3. Remove TileGrad, since it was equivalent to reduce_sum (30dec2015)
|
||||
// 4. When support for this version is removed, we can safely make AttrValue
|
||||
// parsing more strict with respect to empty list values (see
|
||||
// 111635679, 7jan2016).
|
||||
// 5. Graphs are wholly-validated during Session::Create() (7jan2016).
|
||||
// 6. TensorFlow is scalar strict within Google (27jan2016).
|
||||
// 7. Remove TopK in favor of TopKV2 (5feb2016).
|
||||
// 8. Replace RandomCrop from C++ with pure Python (5feb2016).
|
||||
// 9. Deprecate batch_norm_with_global_normalization (16feb2016).
|
||||
// 10. Deprecate conv3d_backprop_{filter,input} (10jun2016).
|
||||
// 11. Deprecate {batch}_self_adjoint_eig (3aug2016).
|
||||
// 12. Graph consumers understand the node_def field of FunctionDef (22aug2016).
|
||||
// 13. Deprecate multiple batch linear algebra ops (9sep2016).
|
||||
// 14. Deprecate batch_matrix_* ops. (10sep2016).
|
||||
// 15. Deprecate batch_fft_* ops. (14sep2016).
|
||||
// 16. Deprecate tensor_array (v1) ops in favor of v2 (10nov2016).
|
||||
// 17. Deprecate inv (11nov2016).
|
||||
// 17. Expose reverse_v2 (10nov2016)
|
||||
// 18. Add VariableV2 (30nov2016)
|
||||
// 19. Deprecated ops created by models moved out of core SkipGram, NegTrain.
|
||||
// (08dec2016)
|
||||
// 20. Catch all version 1.0 changes to Python API generation. SplitV is now
|
||||
// used for tf.split, ReverseV2 is now used by tf.reverse, ConcatV2 is
|
||||
// now used by tf.concat. Graphs use flooring
|
||||
// division and mod semantics. TensorArrayV3. (12dec2016)
|
||||
// Also considered the version for when it is required for reduction
|
||||
// ops' indices to be scalar or vector, and not higher rank.
|
||||
// Some earlier graph def versions allowed this.
|
||||
// 21. Dropped FunctionDef.Node support, switched to node_def introduced
|
||||
// in version 12. (11jan2017)
|
||||
// 22. Placeholder now can specify and enforce scalar and partial
|
||||
// shapes, particularly when restoring a graph from GraphDef
|
||||
// produced at version 22 or later. (04/10/2016)
|
||||
// 23. Remove NonMaxSuppression in favor of NonMaxSuppressionV2.
|
||||
// 24. Deprecate lookup ops (v1) ops in favor of v2 (30may2017)
|
||||
// 25. Deprecate stack (v1) ops in favor of v2 (2017/6/15).
|
||||
// 25. Deprecate RandomPoisson (v1) ops in favor of v2 (2017/10/25).
|
||||
// 26. Add a bool 'stripped_default_attrs' to MetaInfoDef indicating
|
||||
// whether default-valued attrs have been stripped from the nodes in the
|
||||
// GraphDef. (7dec2017)
|
||||
// 27. Deprecate TensorArray ops v2 in favor of v3 and deprecated io_ops
|
||||
// deprecated in favor of V2 ops. (2018/01/23)
|
||||
// 28. Deprecate MatrixExponential op in favor of Python implementation.
|
||||
// (2018/08/21).
|
||||
// (2019/02/15). Added `control_ret` field to FunctionDef proto, and
|
||||
// `control_output` field to OpDef proto.
|
||||
// 29. Deprecate StatefulStandardNormal op in favor of StatefulStandardNormalV2.
|
||||
// (2019/03/25).
|
||||
// (2019/04/17). Added `arg_attr` field to FunctionDefProto.
|
||||
// 30. (2019/05/09) First date based GraphDef version. GraphDef
|
||||
// versions advance by 1 each day after this point.
|
||||
|
||||
#define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
|
||||
#define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
|
||||
#define TF_GRAPH_DEF_VERSION 389 // Updated: 2020/5/2
|
||||
|
||||
// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
|
||||
//
|
||||
// The checkpoint versions have the same semantics as GraphDef versions, but the
|
||||
// numbering scheme is separate. We have no plans to ever deprecate checkpoint
|
||||
// versions, but it's good to have this in place in case we ever need to.
|
||||
//
|
||||
// Version history:
|
||||
//
|
||||
// 0. Checkpoints saved before checkpoint versioning.
|
||||
// 1. First real version (10feb2015).
|
||||
#define TF_CHECKPOINT_VERSION_MIN_PRODUCER 0
|
||||
#define TF_CHECKPOINT_VERSION_MIN_CONSUMER 0
|
||||
#define TF_CHECKPOINT_VERSION 1
|
||||
|
||||
/// Version query functions (defined in generated version_info.cc)
|
||||
|
||||
// Host compiler version (declared elsewhere to be __VERSION__)
|
||||
extern const char* tf_compiler_version();
|
||||
// The git commit designator when tensorflow was built
|
||||
// If no git repository, this will be "internal".
|
||||
extern const char* tf_git_version();
|
||||
// Value of the _GLIBCXX_USE_CXX11_ABI flag, or 0 if it's not set.
|
||||
extern int tf_cxx11_abi_flag();
|
||||
// Returns 1 if build is monolithic, or 0 otherwise.
|
||||
extern int tf_monolithic_build();
|
||||
|
||||
#endif // TENSORFLOW_CORE_PUBLIC_VERSION_H_
|
||||
466
code/lib/tfmicro/tensorflow/lite/c/builtin_op_data.h
Normal file
466
code/lib/tfmicro/tensorflow/lite/c/builtin_op_data.h
Normal file
@@ -0,0 +1,466 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
|
||||
#define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
|
||||
// number of dimensions.
|
||||
#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
|
||||
|
||||
// TODO(aselle): Consider using "if this then that" for testing.
|
||||
|
||||
// Useful placeholder to put in otherwise empty structs to avoid size warnings.
|
||||
typedef struct {
|
||||
char dummy;
|
||||
} EmptyStructPlaceholder;
|
||||
|
||||
// IMPORTANT: All new members of structs must be added at the end to ensure
|
||||
// backwards compatibility.
|
||||
|
||||
// Possible padding types (for convolutions)
|
||||
typedef enum {
|
||||
kTfLitePaddingUnknown = 0,
|
||||
kTfLitePaddingSame,
|
||||
kTfLitePaddingValid,
|
||||
} TfLitePadding;
|
||||
|
||||
typedef enum {
|
||||
kTfLiteMirrorPaddingUnknown = 0,
|
||||
kTfLiteMirrorPaddingReflect,
|
||||
kTfLiteMirrorPaddingSymmetric,
|
||||
} TfLiteMirrorPaddingMode;
|
||||
|
||||
// TODO(b/130259536): We should move this out of builtin_op_data.
|
||||
typedef struct {
|
||||
int width;
|
||||
int height;
|
||||
int width_offset;
|
||||
int height_offset;
|
||||
} TfLitePaddingValues;
|
||||
|
||||
typedef struct {
|
||||
TfLiteMirrorPaddingMode mode;
|
||||
} TfLiteMirrorPaddingParams;
|
||||
|
||||
// Possible fused activation functions.
|
||||
// TODO(aselle): rename to TfLiteActivation
|
||||
typedef enum {
|
||||
kTfLiteActNone = 0,
|
||||
kTfLiteActRelu,
|
||||
kTfLiteActRelu1, // min(max(-1, x), 1)
|
||||
kTfLiteActRelu6, // min(max(0, x), 6)
|
||||
kTfLiteActTanh,
|
||||
kTfLiteActSignBit,
|
||||
kTfLiteActSigmoid,
|
||||
} TfLiteFusedActivation;
|
||||
|
||||
typedef struct {
|
||||
// Parameters for CONV_2D version 1.
|
||||
TfLitePadding padding;
|
||||
int stride_width;
|
||||
int stride_height;
|
||||
TfLiteFusedActivation activation;
|
||||
|
||||
// Parameters for CONV_2D version 2.
|
||||
// Note: Version 2 supports dilation values not equal to 1.
|
||||
int dilation_width_factor;
|
||||
int dilation_height_factor;
|
||||
} TfLiteConvParams;
|
||||
|
||||
typedef struct {
|
||||
TfLitePadding padding;
|
||||
int stride_width;
|
||||
int stride_height;
|
||||
int filter_width;
|
||||
int filter_height;
|
||||
TfLiteFusedActivation activation;
|
||||
struct {
|
||||
TfLitePaddingValues padding;
|
||||
} computed;
|
||||
} TfLitePoolParams;
|
||||
|
||||
typedef struct {
|
||||
// Parameters for DepthwiseConv version 1 or above.
|
||||
TfLitePadding padding;
|
||||
int stride_width;
|
||||
int stride_height;
|
||||
// `depth_multiplier` is redundant. It's used by CPU kernels in
|
||||
// TensorFlow 2.0 or below, but ignored in versions above.
|
||||
//
|
||||
// The information can be deduced from the shape of input and the shape of
|
||||
// weights. Since the TFLiteConverter toolchain doesn't support partially
|
||||
// specified shapes, relying on `depth_multiplier` stops us from supporting
|
||||
// graphs with dynamic shape tensors.
|
||||
//
|
||||
// Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
|
||||
// field.
|
||||
int depth_multiplier;
|
||||
TfLiteFusedActivation activation;
|
||||
// Parameters for DepthwiseConv version 2 or above.
|
||||
int dilation_width_factor;
|
||||
int dilation_height_factor;
|
||||
} TfLiteDepthwiseConvParams;
|
||||
|
||||
typedef struct {
|
||||
int rank;
|
||||
TfLiteFusedActivation activation;
|
||||
|
||||
// Parameter for SVDF version 4.
|
||||
bool asymmetric_quantize_inputs;
|
||||
} TfLiteSVDFParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteFusedActivation activation;
|
||||
|
||||
// Parameter for RNN version 3.
|
||||
bool asymmetric_quantize_inputs;
|
||||
} TfLiteRNNParams;
|
||||
|
||||
typedef struct {
|
||||
bool time_major;
|
||||
TfLiteFusedActivation activation;
|
||||
|
||||
// Parameter for Sequence RNN version 3.
|
||||
bool asymmetric_quantize_inputs;
|
||||
} TfLiteSequenceRNNParams;
|
||||
|
||||
typedef struct {
|
||||
bool time_major;
|
||||
TfLiteFusedActivation activation;
|
||||
bool merge_outputs;
|
||||
|
||||
// Parameter for Bidirectional RNN verison 3.
|
||||
bool asymmetric_quantize_inputs;
|
||||
} TfLiteBidirectionalSequenceRNNParams;
|
||||
|
||||
typedef enum {
|
||||
kTfLiteFullyConnectedWeightsFormatDefault = 0,
|
||||
kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
|
||||
} TfLiteFullyConnectedWeightsFormat;
|
||||
|
||||
typedef struct {
|
||||
// Parameters for FullyConnected version 1 or above.
|
||||
TfLiteFusedActivation activation;
|
||||
|
||||
// Parameters for FullyConnected version 2 or above.
|
||||
TfLiteFullyConnectedWeightsFormat weights_format;
|
||||
|
||||
// Parameters for FullyConnected version 5 or above.
|
||||
// If set to true, then the number of dimensions in the input and the output
|
||||
// tensors are the same. Furthermore, all but the last dimension of the input
|
||||
// and output shapes will be equal.
|
||||
bool keep_num_dims;
|
||||
|
||||
// Parameters for FullyConnected version 7 or above.
|
||||
// If set to true and the weights are quantized, then non constant inputs
|
||||
// are quantized at evaluation time with asymmetric quantization.
|
||||
bool asymmetric_quantize_inputs;
|
||||
} TfLiteFullyConnectedParams;
|
||||
|
||||
typedef enum {
|
||||
kTfLiteLshProjectionUnknown = 0,
|
||||
kTfLiteLshProjectionSparse = 1,
|
||||
kTfLiteLshProjectionDense = 2,
|
||||
} TfLiteLSHProjectionType;
|
||||
|
||||
typedef struct {
|
||||
TfLiteLSHProjectionType type;
|
||||
} TfLiteLSHProjectionParams;
|
||||
|
||||
typedef struct {
|
||||
float beta;
|
||||
} TfLiteSoftmaxParams;
|
||||
|
||||
typedef struct {
|
||||
int axis;
|
||||
TfLiteFusedActivation activation;
|
||||
} TfLiteConcatenationParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteFusedActivation activation;
|
||||
} TfLiteAddParams;
|
||||
|
||||
typedef struct {
|
||||
EmptyStructPlaceholder placeholder;
|
||||
} TfLiteSpaceToBatchNDParams;
|
||||
|
||||
typedef struct {
|
||||
EmptyStructPlaceholder placeholder;
|
||||
} TfLiteBatchToSpaceNDParams;
|
||||
|
||||
typedef struct {
|
||||
bool adj_x;
|
||||
bool adj_y;
|
||||
} TfLiteBatchMatMulParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteFusedActivation activation;
|
||||
} TfLiteMulParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteFusedActivation activation;
|
||||
} TfLiteSubParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteFusedActivation activation;
|
||||
} TfLiteDivParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteFusedActivation activation;
|
||||
} TfLiteL2NormParams;
|
||||
|
||||
typedef struct {
|
||||
int radius;
|
||||
float bias;
|
||||
float alpha;
|
||||
float beta;
|
||||
} TfLiteLocalResponseNormParams;
|
||||
|
||||
typedef enum {
|
||||
kTfLiteLSTMFullKernel = 0,
|
||||
kTfLiteLSTMBasicKernel
|
||||
} TfLiteLSTMKernelType;
|
||||
|
||||
typedef struct {
|
||||
// Parameters for LSTM version 1.
|
||||
TfLiteFusedActivation activation;
|
||||
float cell_clip;
|
||||
float proj_clip;
|
||||
|
||||
// Parameters for LSTM version 2.
|
||||
// kTfLiteLSTMBasicKernel is only supported in version 2 or above.
|
||||
TfLiteLSTMKernelType kernel_type;
|
||||
|
||||
// Parameters for LSTM version 4.
|
||||
bool asymmetric_quantize_inputs;
|
||||
} TfLiteLSTMParams;
|
||||
|
||||
typedef struct {
|
||||
// Parameters needed for the underlying LSTM.
|
||||
TfLiteFusedActivation activation;
|
||||
float cell_clip;
|
||||
float proj_clip;
|
||||
|
||||
// If set to true then the first dimension is time, otherwise batch.
|
||||
bool time_major;
|
||||
|
||||
// Parameter for unidirectional sequence RNN version 3.
|
||||
bool asymmetric_quantize_inputs;
|
||||
} TfLiteUnidirectionalSequenceLSTMParams;
|
||||
|
||||
typedef struct {
|
||||
// Parameters supported by version 1:
|
||||
// Parameters inherited for the LSTM kernel.
|
||||
TfLiteFusedActivation activation;
|
||||
float cell_clip;
|
||||
float proj_clip;
|
||||
|
||||
// If true, store the outputs of both directions in the first output.
|
||||
bool merge_outputs;
|
||||
|
||||
// Parameters supported by version 2:
|
||||
// If set to true then the first dimension is time, otherwise batch.
|
||||
bool time_major;
|
||||
|
||||
// Parameters supported by version 4:
|
||||
// If set to true, then hybrid ops use asymmetric quantization for inputs.
|
||||
bool asymmetric_quantize_inputs;
|
||||
} TfLiteBidirectionalSequenceLSTMParams;
|
||||
|
||||
typedef struct {
|
||||
bool align_corners;
|
||||
// half_pixel_centers assumes pixels are of half the actual dimensions, and
|
||||
// yields more accurate resizes. Corresponds to the same argument for the
|
||||
// original TensorFlow op in TF2.0.
|
||||
bool half_pixel_centers;
|
||||
} TfLiteResizeBilinearParams;
|
||||
|
||||
typedef struct {
|
||||
bool align_corners;
|
||||
} TfLiteResizeNearestNeighborParams;
|
||||
|
||||
typedef struct {
|
||||
EmptyStructPlaceholder placeholder;
|
||||
} TfLitePadParams;
|
||||
|
||||
typedef struct {
|
||||
EmptyStructPlaceholder placeholder;
|
||||
} TfLitePadV2Params;
|
||||
|
||||
typedef struct {
|
||||
// TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
|
||||
// For now we will fix the maximum possible number of dimensions.
|
||||
int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
|
||||
int num_dimensions;
|
||||
} TfLiteReshapeParams;
|
||||
|
||||
typedef struct {
|
||||
int ngram_size;
|
||||
int max_skip_size;
|
||||
bool include_all_ngrams;
|
||||
} TfLiteSkipGramParams;
|
||||
|
||||
typedef struct {
|
||||
int block_size;
|
||||
} TfLiteSpaceToDepthParams;
|
||||
|
||||
typedef struct {
|
||||
int block_size;
|
||||
} TfLiteDepthToSpaceParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteType in_data_type;
|
||||
TfLiteType out_data_type;
|
||||
} TfLiteCastParams;
|
||||
|
||||
typedef enum {
|
||||
kTfLiteCombinerTypeSum = 0,
|
||||
kTfLiteCombinerTypeMean = 1,
|
||||
kTfLiteCombinerTypeSqrtn = 2,
|
||||
} TfLiteCombinerType;
|
||||
|
||||
typedef struct {
|
||||
TfLiteCombinerType combiner;
|
||||
} TfLiteEmbeddingLookupSparseParams;
|
||||
|
||||
typedef struct {
|
||||
int axis;
|
||||
} TfLiteGatherParams;
|
||||
|
||||
typedef struct {
|
||||
EmptyStructPlaceholder placeholder;
|
||||
} TfLiteTransposeParams;
|
||||
|
||||
typedef struct {
|
||||
bool keep_dims;
|
||||
} TfLiteReducerParams;
|
||||
|
||||
typedef struct {
|
||||
int num_splits;
|
||||
} TfLiteSplitParams;
|
||||
|
||||
typedef struct {
|
||||
int num_splits;
|
||||
} TfLiteSplitVParams;
|
||||
|
||||
typedef struct {
|
||||
// TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
|
||||
// For now we will fix the maximum possible number of dimensions.
|
||||
int squeeze_dims[8];
|
||||
int num_squeeze_dims;
|
||||
} TfLiteSqueezeParams;
|
||||
|
||||
typedef struct {
|
||||
int begin_mask;
|
||||
int end_mask;
|
||||
int ellipsis_mask;
|
||||
int new_axis_mask;
|
||||
int shrink_axis_mask;
|
||||
} TfLiteStridedSliceParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteType output_type;
|
||||
} TfLiteArgMaxParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteType output_type;
|
||||
} TfLiteArgMinParams;
|
||||
|
||||
typedef struct {
|
||||
TfLitePadding padding;
|
||||
int stride_width;
|
||||
int stride_height;
|
||||
} TfLiteTransposeConvParams;
|
||||
|
||||
typedef struct {
|
||||
bool validate_indices;
|
||||
} TfLiteSparseToDenseParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteType out_type;
|
||||
} TfLiteShapeParams;
|
||||
|
||||
typedef struct {
|
||||
EmptyStructPlaceholder placeholder;
|
||||
} TfLiteRankParams;
|
||||
|
||||
typedef struct {
|
||||
// Parameters supported by version 1:
|
||||
float min;
|
||||
float max;
|
||||
int num_bits;
|
||||
|
||||
// Parameters supported by version 2:
|
||||
bool narrow_range;
|
||||
} TfLiteFakeQuantParams;
|
||||
|
||||
typedef struct {
|
||||
int values_count;
|
||||
int axis;
|
||||
} TfLitePackParams;
|
||||
|
||||
typedef struct {
|
||||
int axis;
|
||||
} TfLiteOneHotParams;
|
||||
|
||||
typedef struct {
|
||||
int num;
|
||||
int axis;
|
||||
} TfLiteUnpackParams;
|
||||
|
||||
typedef struct {
|
||||
float alpha;
|
||||
} TfLiteLeakyReluParams;
|
||||
|
||||
typedef struct {
|
||||
TfLiteType index_out_type;
|
||||
} TfLiteUniqueParams;
|
||||
|
||||
typedef struct {
|
||||
int seq_dim;
|
||||
int batch_dim;
|
||||
} TfLiteReverseSequenceParams;
|
||||
|
||||
typedef struct {
|
||||
EmptyStructPlaceholder placeholder;
|
||||
} TfLiteMatrixDiagParams;
|
||||
|
||||
typedef struct {
|
||||
EmptyStructPlaceholder placeholder;
|
||||
} TfLiteMatrixSetDiagParams;
|
||||
|
||||
typedef struct {
|
||||
int then_subgraph_index;
|
||||
int else_subgraph_index;
|
||||
} TfLiteIfParams;
|
||||
|
||||
typedef struct {
|
||||
int cond_subgraph_index;
|
||||
int body_subgraph_index;
|
||||
} TfLiteWhileParams;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
|
||||
228
code/lib/tfmicro/tensorflow/lite/c/common.c
Normal file
228
code/lib/tfmicro/tensorflow/lite/c/common.c
Normal file
@@ -0,0 +1,228 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
int TfLiteIntArrayGetSizeInBytes(int size) {
|
||||
static TfLiteIntArray dummy;
|
||||
return sizeof(dummy) + sizeof(dummy.data[0]) * size;
|
||||
}
|
||||
|
||||
int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b) {
|
||||
if (a == b) return 1;
|
||||
if (a == NULL || b == NULL) return 0;
|
||||
return TfLiteIntArrayEqualsArray(a, b->size, b->data);
|
||||
}
|
||||
|
||||
int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
|
||||
const int b_data[]) {
|
||||
if (a == NULL) return (b_size == 0);
|
||||
if (a->size != b_size) return 0;
|
||||
int i = 0;
|
||||
for (; i < a->size; i++)
|
||||
if (a->data[i] != b_data[i]) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
|
||||
TfLiteIntArray* TfLiteIntArrayCreate(int size) {
|
||||
TfLiteIntArray* ret =
|
||||
(TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
|
||||
ret->size = size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src) {
|
||||
if (!src) return NULL;
|
||||
TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size);
|
||||
if (ret) {
|
||||
memcpy(ret->data, src->data, src->size * sizeof(int));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); }
|
||||
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
int TfLiteFloatArrayGetSizeInBytes(int size) {
|
||||
static TfLiteFloatArray dummy;
|
||||
return sizeof(dummy) + sizeof(dummy.data[0]) * size;
|
||||
}
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
|
||||
TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
|
||||
TfLiteFloatArray* ret =
|
||||
(TfLiteFloatArray*)malloc(TfLiteFloatArrayGetSizeInBytes(size));
|
||||
ret->size = size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
|
||||
|
||||
void TfLiteTensorDataFree(TfLiteTensor* t) {
|
||||
if (t->allocation_type == kTfLiteDynamic) {
|
||||
free(t->data.raw);
|
||||
}
|
||||
t->data.raw = NULL;
|
||||
}
|
||||
|
||||
void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
|
||||
if (quantization->type == kTfLiteAffineQuantization) {
|
||||
TfLiteAffineQuantization* q_params =
|
||||
(TfLiteAffineQuantization*)(quantization->params);
|
||||
if (q_params->scale) {
|
||||
TfLiteFloatArrayFree(q_params->scale);
|
||||
q_params->scale = NULL;
|
||||
}
|
||||
if (q_params->zero_point) {
|
||||
TfLiteIntArrayFree(q_params->zero_point);
|
||||
q_params->zero_point = NULL;
|
||||
}
|
||||
free(q_params);
|
||||
}
|
||||
quantization->params = NULL;
|
||||
quantization->type = kTfLiteNoQuantization;
|
||||
}
|
||||
|
||||
void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
|
||||
if (sparsity == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (sparsity->traversal_order) {
|
||||
TfLiteIntArrayFree(sparsity->traversal_order);
|
||||
sparsity->traversal_order = NULL;
|
||||
}
|
||||
|
||||
if (sparsity->block_map) {
|
||||
TfLiteIntArrayFree(sparsity->block_map);
|
||||
sparsity->block_map = NULL;
|
||||
}
|
||||
|
||||
if (sparsity->dim_metadata) {
|
||||
int i = 0;
|
||||
for (; i < sparsity->dim_metadata_size; i++) {
|
||||
TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i];
|
||||
if (metadata.format == kTfLiteDimSparseCSR) {
|
||||
TfLiteIntArrayFree(metadata.array_segments);
|
||||
metadata.array_segments = NULL;
|
||||
TfLiteIntArrayFree(metadata.array_indices);
|
||||
metadata.array_indices = NULL;
|
||||
}
|
||||
}
|
||||
free(sparsity->dim_metadata);
|
||||
sparsity->dim_metadata = NULL;
|
||||
}
|
||||
|
||||
free(sparsity);
|
||||
}
|
||||
|
||||
void TfLiteTensorFree(TfLiteTensor* t) {
|
||||
TfLiteTensorDataFree(t);
|
||||
if (t->dims) TfLiteIntArrayFree(t->dims);
|
||||
t->dims = NULL;
|
||||
|
||||
if (t->dims_signature) {
|
||||
TfLiteIntArrayFree((TfLiteIntArray *) t->dims_signature);
|
||||
}
|
||||
t->dims_signature = NULL;
|
||||
|
||||
TfLiteQuantizationFree(&t->quantization);
|
||||
TfLiteSparsityFree(t->sparsity);
|
||||
t->sparsity = NULL;
|
||||
}
|
||||
|
||||
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
|
||||
TfLiteQuantizationParams quantization, char* buffer,
|
||||
size_t size, TfLiteAllocationType allocation_type,
|
||||
const void* allocation, bool is_variable,
|
||||
TfLiteTensor* tensor) {
|
||||
TfLiteTensorFree(tensor);
|
||||
tensor->type = type;
|
||||
tensor->name = name;
|
||||
tensor->dims = dims;
|
||||
tensor->params = quantization;
|
||||
tensor->data.raw = buffer;
|
||||
tensor->bytes = size;
|
||||
tensor->allocation_type = allocation_type;
|
||||
tensor->allocation = allocation;
|
||||
tensor->is_variable = is_variable;
|
||||
|
||||
tensor->quantization.type = kTfLiteNoQuantization;
|
||||
tensor->quantization.params = NULL;
|
||||
}
|
||||
|
||||
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
|
||||
if (tensor->allocation_type != kTfLiteDynamic) {
|
||||
return;
|
||||
}
|
||||
// TODO(b/145340303): Tensor data should be aligned.
|
||||
if (!tensor->data.raw) {
|
||||
tensor->data.raw = malloc(num_bytes);
|
||||
} else if (num_bytes > tensor->bytes) {
|
||||
tensor->data.raw = realloc(tensor->data.raw, num_bytes);
|
||||
}
|
||||
tensor->bytes = num_bytes;
|
||||
}
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
const char* TfLiteTypeGetName(TfLiteType type) {
|
||||
switch (type) {
|
||||
case kTfLiteNoType:
|
||||
return "NOTYPE";
|
||||
case kTfLiteFloat32:
|
||||
return "FLOAT32";
|
||||
case kTfLiteInt16:
|
||||
return "INT16";
|
||||
case kTfLiteInt32:
|
||||
return "INT32";
|
||||
case kTfLiteUInt8:
|
||||
return "UINT8";
|
||||
case kTfLiteInt8:
|
||||
return "INT8";
|
||||
case kTfLiteInt64:
|
||||
return "INT64";
|
||||
case kTfLiteBool:
|
||||
return "BOOL";
|
||||
case kTfLiteComplex64:
|
||||
return "COMPLEX64";
|
||||
case kTfLiteString:
|
||||
return "STRING";
|
||||
case kTfLiteFloat16:
|
||||
return "FLOAT16";
|
||||
case kTfLiteFloat64:
|
||||
return "FLOAT64";
|
||||
}
|
||||
return "Unknown type";
|
||||
}
|
||||
|
||||
TfLiteDelegate TfLiteDelegateCreate() {
|
||||
TfLiteDelegate d = {
|
||||
.data_ = NULL,
|
||||
.Prepare = NULL,
|
||||
.CopyFromBufferHandle = NULL,
|
||||
.CopyToBufferHandle = NULL,
|
||||
.FreeBufferHandle = NULL,
|
||||
.flags = kTfLiteDelegateFlagsNone,
|
||||
};
|
||||
return d;
|
||||
}
|
||||
767
code/lib/tfmicro/tensorflow/lite/c/common.h
Normal file
767
code/lib/tfmicro/tensorflow/lite/c/common.h
Normal file
@@ -0,0 +1,767 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// This file defines common C types and APIs for implementing operations,
|
||||
// delegates and other constructs in TensorFlow Lite. The actual operations and
|
||||
// delegates can be defined using C++, but the interface between the interpreter
|
||||
// and the operations are C.
|
||||
//
|
||||
// Summary of abstractions
|
||||
// TF_LITE_ENSURE - Self-sufficient error checking
|
||||
// TfLiteStatus - Status reporting
|
||||
// TfLiteIntArray - stores tensor shapes (dims),
|
||||
// TfLiteContext - allows an op to access the tensors
|
||||
// TfLiteTensor - tensor (a multidimensional array)
|
||||
// TfLiteNode - a single node or operation
|
||||
// TfLiteRegistration - the implementation of a conceptual operation.
|
||||
// TfLiteDelegate - allows delegation of nodes to alternative backends.
|
||||
//
|
||||
// Some abstractions in this file are created and managed by Interpreter.
|
||||
|
||||
#ifndef TENSORFLOW_LITE_C_COMMON_H_
|
||||
#define TENSORFLOW_LITE_C_COMMON_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
typedef enum TfLiteStatus {
|
||||
kTfLiteOk = 0,
|
||||
kTfLiteError = 1,
|
||||
kTfLiteDelegateError = 2
|
||||
} TfLiteStatus;
|
||||
|
||||
// The list of external context types known to TF Lite. This list exists solely
|
||||
// to avoid conflicts and to ensure ops can share the external contexts they
|
||||
// need. Access to the external contexts is controlled by one of the
|
||||
// corresponding support files.
|
||||
typedef enum TfLiteExternalContextType {
|
||||
kTfLiteEigenContext = 0, // include eigen_support.h to use.
|
||||
kTfLiteGemmLowpContext = 1, // include gemm_support.h to use.
|
||||
kTfLiteEdgeTpuContext = 2, // Placeholder for Edge TPU support.
|
||||
kTfLiteCpuBackendContext = 3, // include cpu_backend_support.h to use.
|
||||
kTfLiteMaxExternalContexts = 4
|
||||
} TfLiteExternalContextType;
|
||||
|
||||
// Forward declare so dependent structs and methods can reference these types
|
||||
// prior to the struct definitions.
|
||||
struct TfLiteContext;
|
||||
struct TfLiteDelegate;
|
||||
struct TfLiteRegistration;
|
||||
|
||||
// An external context is a collection of information unrelated to the TF Lite
|
||||
// framework, but useful to a subset of the ops. TF Lite knows very little
|
||||
// about about the actual contexts, but it keeps a list of them, and is able to
|
||||
// refresh them if configurations like the number of recommended threads
|
||||
// change.
|
||||
typedef struct TfLiteExternalContext {
|
||||
TfLiteExternalContextType type;
|
||||
TfLiteStatus (*Refresh)(struct TfLiteContext* context);
|
||||
} TfLiteExternalContext;
|
||||
|
||||
#define kTfLiteOptionalTensor (-1)
|
||||
|
||||
// Fixed size list of integers. Used for dimensions and inputs/outputs tensor
|
||||
// indices
|
||||
typedef struct TfLiteIntArray {
|
||||
int size;
|
||||
// gcc 6.1+ have a bug where flexible members aren't properly handled
|
||||
// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
|
||||
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
|
||||
__GNUC_MINOR__ >= 1
|
||||
int data[0];
|
||||
#else
|
||||
int data[];
|
||||
#endif
|
||||
} TfLiteIntArray;
|
||||
|
||||
// Given the size (number of elements) in a TfLiteIntArray, calculate its size
|
||||
// in bytes.
|
||||
int TfLiteIntArrayGetSizeInBytes(int size);
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Create a array of a given `size` (uninitialized entries).
|
||||
// This returns a pointer, that you must free using TfLiteIntArrayFree().
|
||||
TfLiteIntArray* TfLiteIntArrayCreate(int size);
|
||||
#endif
|
||||
|
||||
// Check if two intarrays are equal. Returns 1 if they are equal, 0 otherwise.
|
||||
int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b);
|
||||
|
||||
// Check if an intarray equals an array. Returns 1 if equals, 0 otherwise.
|
||||
int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
|
||||
const int b_data[]);
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Create a copy of an array passed as `src`.
|
||||
// You are expected to free memory with TfLiteIntArrayFree
|
||||
TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src);
|
||||
|
||||
// Free memory of array `a`.
|
||||
void TfLiteIntArrayFree(TfLiteIntArray* a);
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// Fixed size list of floats. Used for per-channel quantization.
|
||||
typedef struct TfLiteFloatArray {
|
||||
int size;
|
||||
// gcc 6.1+ have a bug where flexible members aren't properly handled
|
||||
// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
|
||||
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
|
||||
__GNUC_MINOR__ >= 1
|
||||
float data[0];
|
||||
#else
|
||||
float data[];
|
||||
#endif
|
||||
} TfLiteFloatArray;
|
||||
|
||||
// Given the size (number of elements) in a TfLiteFloatArray, calculate its size
|
||||
// in bytes.
|
||||
int TfLiteFloatArrayGetSizeInBytes(int size);
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Create a array of a given `size` (uninitialized entries).
|
||||
// This returns a pointer, that you must free using TfLiteFloatArrayFree().
|
||||
TfLiteFloatArray* TfLiteFloatArrayCreate(int size);
|
||||
|
||||
// Free memory of array `a`.
|
||||
void TfLiteFloatArrayFree(TfLiteFloatArray* a);
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// Since we must not depend on any libraries, define a minimal subset of
|
||||
// error macros while avoiding names that have pre-conceived meanings like
|
||||
// assert and check.
|
||||
|
||||
// Try to make all reporting calls through TF_LITE_KERNEL_LOG rather than
|
||||
// calling the context->ReportError function directly, so that message strings
|
||||
// can be stripped out if the binary size needs to be severely optimized.
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
#define TF_LITE_KERNEL_LOG(context, ...) \
|
||||
do { \
|
||||
(context)->ReportError((context), __VA_ARGS__); \
|
||||
} while (false)
|
||||
|
||||
#define TF_LITE_MAYBE_KERNEL_LOG(context, ...) \
|
||||
do { \
|
||||
if ((context) != nullptr) { \
|
||||
(context)->ReportError((context), __VA_ARGS__); \
|
||||
} \
|
||||
} while (false)
|
||||
#else // TF_LITE_STRIP_ERROR_STRINGS
|
||||
#define TF_LITE_KERNEL_LOG(context, ...)
|
||||
#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)
|
||||
#endif // TF_LITE_STRIP_ERROR_STRINGS
|
||||
|
||||
// Check whether value is true, and if not return kTfLiteError from
|
||||
// the current function (and report the error string msg).
|
||||
#define TF_LITE_ENSURE_MSG(context, value, msg) \
|
||||
do { \
|
||||
if (!(value)) { \
|
||||
TF_LITE_KERNEL_LOG((context), __FILE__ " " msg); \
|
||||
return kTfLiteError; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Check whether the value `a` is true, and if not return kTfLiteError from
|
||||
// the current function, while also reporting the location of the error.
|
||||
#define TF_LITE_ENSURE(context, a) \
|
||||
do { \
|
||||
if (!(a)) { \
|
||||
TF_LITE_KERNEL_LOG((context), "%s:%d %s was not true.", __FILE__, \
|
||||
__LINE__, #a); \
|
||||
return kTfLiteError; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define TF_LITE_ENSURE_STATUS(a) \
|
||||
do { \
|
||||
const TfLiteStatus s = (a); \
|
||||
if (s != kTfLiteOk) { \
|
||||
return s; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Check whether the value `a == b` is true, and if not return kTfLiteError from
|
||||
// the current function, while also reporting the location of the error.
|
||||
// `a` and `b` may be evaluated more than once, so no side effects or
|
||||
// extremely expensive computations should be done.
|
||||
#define TF_LITE_ENSURE_EQ(context, a, b) \
|
||||
do { \
|
||||
if ((a) != (b)) { \
|
||||
TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%d != %d)", __FILE__, \
|
||||
__LINE__, #a, #b, (a), (b)); \
|
||||
return kTfLiteError; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define TF_LITE_ENSURE_TYPES_EQ(context, a, b) \
|
||||
do { \
|
||||
if ((a) != (b)) { \
|
||||
TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%s != %s)", __FILE__, \
|
||||
__LINE__, #a, #b, TfLiteTypeGetName(a), \
|
||||
TfLiteTypeGetName(b)); \
|
||||
return kTfLiteError; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define TF_LITE_ENSURE_OK(context, status) \
|
||||
do { \
|
||||
const TfLiteStatus s = (status); \
|
||||
if ((s) != kTfLiteOk) { \
|
||||
return s; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Single-precision complex data type compatible with the C99 definition.
|
||||
typedef struct TfLiteComplex64 {
|
||||
float re, im; // real and imaginary parts, respectively.
|
||||
} TfLiteComplex64;
|
||||
|
||||
// Half precision data type compatible with the C99 definition.
|
||||
typedef struct TfLiteFloat16 {
|
||||
uint16_t data;
|
||||
} TfLiteFloat16;
|
||||
|
||||
// Types supported by tensor
|
||||
typedef enum {
|
||||
kTfLiteNoType = 0,
|
||||
kTfLiteFloat32 = 1,
|
||||
kTfLiteInt32 = 2,
|
||||
kTfLiteUInt8 = 3,
|
||||
kTfLiteInt64 = 4,
|
||||
kTfLiteString = 5,
|
||||
kTfLiteBool = 6,
|
||||
kTfLiteInt16 = 7,
|
||||
kTfLiteComplex64 = 8,
|
||||
kTfLiteInt8 = 9,
|
||||
kTfLiteFloat16 = 10,
|
||||
kTfLiteFloat64 = 11,
|
||||
} TfLiteType;
|
||||
|
||||
// Return the name of a given type, for error reporting purposes.
|
||||
const char* TfLiteTypeGetName(TfLiteType type);
|
||||
|
||||
// SupportedQuantizationTypes.
|
||||
typedef enum TfLiteQuantizationType {
|
||||
// No quantization.
|
||||
kTfLiteNoQuantization = 0,
|
||||
// Affine quantization (with support for per-channel quantization).
|
||||
// Corresponds to TfLiteAffineQuantization.
|
||||
kTfLiteAffineQuantization = 1,
|
||||
} TfLiteQuantizationType;
|
||||
|
||||
// Structure specifying the quantization used by the tensor, if-any.
|
||||
typedef struct TfLiteQuantization {
|
||||
// The type of quantization held by params.
|
||||
TfLiteQuantizationType type;
|
||||
// Holds a reference to one of the quantization param structures specified
|
||||
// below.
|
||||
void* params;
|
||||
} TfLiteQuantization;
|
||||
|
||||
// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
|
||||
// If per-layer quantization is specified this field will still be populated in
|
||||
// addition to TfLiteAffineQuantization.
|
||||
// Parameters for asymmetric quantization. Quantized values can be converted
|
||||
// back to float using:
|
||||
// real_value = scale * (quantized_value - zero_point)
|
||||
typedef struct TfLiteQuantizationParams {
|
||||
float scale;
|
||||
int32_t zero_point;
|
||||
} TfLiteQuantizationParams;
|
||||
|
||||
// Parameters for asymmetric quantization across a dimension (i.e per output
|
||||
// channel quantization).
|
||||
// quantized_dimension specifies which dimension the scales and zero_points
|
||||
// correspond to.
|
||||
// For a particular value in quantized_dimension, quantized values can be
|
||||
// converted back to float using:
|
||||
// real_value = scale * (quantized_value - zero_point)
|
||||
typedef struct TfLiteAffineQuantization {
|
||||
TfLiteFloatArray* scale;
|
||||
TfLiteIntArray* zero_point;
|
||||
int32_t quantized_dimension;
|
||||
} TfLiteAffineQuantization;
|
||||
|
||||
/* A union of pointers that points to memory for a given tensor. */
|
||||
typedef union TfLitePtrUnion {
|
||||
/* Do not access these members directly, if possible, use
|
||||
* GetTensorData<TYPE>(tensor) instead, otherwise only access .data, as other
|
||||
* members are deprecated. */
|
||||
int32_t* i32;
|
||||
int64_t* i64;
|
||||
float* f;
|
||||
TfLiteFloat16* f16;
|
||||
char* raw;
|
||||
const char* raw_const;
|
||||
uint8_t* uint8;
|
||||
bool* b;
|
||||
int16_t* i16;
|
||||
TfLiteComplex64* c64;
|
||||
int8_t* int8;
|
||||
/* Only use this member. */
|
||||
void* data;
|
||||
} TfLitePtrUnion;
|
||||
|
||||
// Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped
|
||||
// data (or data externally allocated). kTfLiteArenaRw is arena allocated
|
||||
// data. kTfLiteDynamic is for tensors that are allocated during evaluation.
|
||||
typedef enum TfLiteAllocationType {
|
||||
kTfLiteMemNone = 0,
|
||||
kTfLiteMmapRo,
|
||||
kTfLiteArenaRw,
|
||||
kTfLiteArenaRwPersistent,
|
||||
kTfLiteDynamic,
|
||||
} TfLiteAllocationType;
|
||||
|
||||
// The delegates should use zero or positive integers to represent handles.
|
||||
// -1 is reserved from unallocated status.
|
||||
typedef int TfLiteBufferHandle;
|
||||
enum {
|
||||
kTfLiteNullBufferHandle = -1,
|
||||
};
|
||||
|
||||
// Storage format of each dimension in a sparse tensor.
|
||||
typedef enum TfLiteDimensionType {
|
||||
kTfLiteDimDense = 0,
|
||||
kTfLiteDimSparseCSR,
|
||||
} TfLiteDimensionType;
|
||||
|
||||
// Metadata to encode each dimension in a sparse tensor.
|
||||
typedef struct TfLiteDimensionMetadata {
|
||||
TfLiteDimensionType format;
|
||||
int dense_size;
|
||||
TfLiteIntArray* array_segments;
|
||||
TfLiteIntArray* array_indices;
|
||||
} TfLiteDimensionMetadata;
|
||||
|
||||
// Parameters used to encode a sparse tensor. For detailed explanation of each
|
||||
// field please refer to lite/schema/schema.fbs.
|
||||
typedef struct TfLiteSparsity {
|
||||
TfLiteIntArray* traversal_order;
|
||||
TfLiteIntArray* block_map;
|
||||
TfLiteDimensionMetadata* dim_metadata;
|
||||
int dim_metadata_size;
|
||||
} TfLiteSparsity;
|
||||
|
||||
// An tensor in the interpreter system which is a wrapper around a buffer of
|
||||
// data including a dimensionality (or NULL if not currently defined).
|
||||
typedef struct TfLiteTensor {
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
TfLiteType type;
|
||||
// A union of data pointers. The appropriate type should be used for a typed
|
||||
// tensor based on `type`.
|
||||
TfLitePtrUnion data;
|
||||
// A pointer to a structure representing the dimensionality interpretation
|
||||
// that the buffer should have. NOTE: the product of elements of `dims`
|
||||
// and the element datatype size should be equal to `bytes` below.
|
||||
TfLiteIntArray* dims;
|
||||
// Quantization information.
|
||||
TfLiteQuantizationParams params;
|
||||
// How memory is mapped
|
||||
// kTfLiteMmapRo: Memory mapped read only.
|
||||
// i.e. weights
|
||||
// kTfLiteArenaRw: Arena allocated read write memory
|
||||
// (i.e. temporaries, outputs).
|
||||
TfLiteAllocationType allocation_type;
|
||||
// The number of bytes required to store the data of this Tensor. I.e.
|
||||
// (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if
|
||||
// type is kTfLiteFloat32 and dims = {3, 2} then
|
||||
// bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
|
||||
size_t bytes;
|
||||
|
||||
// An opaque pointer to a tflite::MMapAllocation
|
||||
const void* allocation;
|
||||
|
||||
// Null-terminated name of this tensor.
|
||||
const char* name;
|
||||
|
||||
// The delegate which knows how to handle `buffer_handle`.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
struct TfLiteDelegate* delegate;
|
||||
|
||||
// An integer buffer handle that can be handled by `delegate`.
|
||||
// The value is valid only when delegate is not null.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteBufferHandle buffer_handle;
|
||||
|
||||
// If the delegate uses its own buffer (e.g. GPU memory), the delegate is
|
||||
// responsible to set data_is_stale to true.
|
||||
// `delegate->CopyFromBufferHandle` can be called to copy the data from
|
||||
// delegate buffer.
|
||||
// WARNING: This is an // experimental interface that is subject to change.
|
||||
bool data_is_stale;
|
||||
|
||||
// True if the tensor is a variable.
|
||||
bool is_variable;
|
||||
|
||||
// Quantization information. Replaces params field above.
|
||||
TfLiteQuantization quantization;
|
||||
|
||||
// Parameters used to encode a sparse tensor.
|
||||
// This is optional. The field is NULL if a tensor is dense.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteSparsity* sparsity;
|
||||
|
||||
// Optional. Encodes shapes with unknown dimensions with -1. This field is
|
||||
// only populated when unknown dimensions exist in a read-write tensor (i.e.
|
||||
// an input or output tensor). (e.g. `dims` contains [1, 1, 1, 3] and
|
||||
// `dims_signature` contains [1, -1, -1, 3]).
|
||||
const TfLiteIntArray* dims_signature;
|
||||
} TfLiteTensor;
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Free data memory of tensor `t`.
|
||||
void TfLiteTensorDataFree(TfLiteTensor* t);
|
||||
|
||||
// Free quantization data.
|
||||
void TfLiteQuantizationFree(TfLiteQuantization* quantization);
|
||||
|
||||
// Free sparsity parameters.
|
||||
void TfLiteSparsityFree(TfLiteSparsity* sparsity);
|
||||
|
||||
// Free memory of tensor `t`.
|
||||
void TfLiteTensorFree(TfLiteTensor* t);
|
||||
|
||||
// Set all of a tensor's fields (and free any previously allocated data).
|
||||
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
|
||||
TfLiteQuantizationParams quantization, char* buffer,
|
||||
size_t size, TfLiteAllocationType allocation_type,
|
||||
const void* allocation, bool is_variable,
|
||||
TfLiteTensor* tensor);
|
||||
|
||||
// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
|
||||
// types other than kTfLiteDynamic will be ignored.
|
||||
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// A structure representing an instance of a node.
|
||||
// This structure only exhibits the inputs, outputs and user defined data, not
|
||||
// other features like the type.
|
||||
typedef struct TfLiteNode {
|
||||
// Inputs to this node expressed as indices into the simulator's tensors.
|
||||
TfLiteIntArray* inputs;
|
||||
|
||||
// Outputs to this node expressed as indices into the simulator's tensors.
|
||||
TfLiteIntArray* outputs;
|
||||
|
||||
// intermediate tensors to this node expressed as indices into the simulator's
|
||||
// tensors.
|
||||
TfLiteIntArray* intermediates;
|
||||
|
||||
// Temporary tensors uses during the computations. This usually contains no
|
||||
// tensors, but ops are allowed to change that if they need scratch space of
|
||||
// any sort.
|
||||
TfLiteIntArray* temporaries;
|
||||
|
||||
// Opaque data provided by the node implementer through `Registration.init`.
|
||||
void* user_data;
|
||||
|
||||
// Opaque data provided to the node if the node is a builtin. This is usually
|
||||
// a structure defined in builtin_op_data.h
|
||||
void* builtin_data;
|
||||
|
||||
// Custom initial data. This is the opaque data provided in the flatbuffer.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
const void* custom_initial_data;
|
||||
int custom_initial_data_size;
|
||||
|
||||
// The pointer to the delegate. This is non-null only when the node is
|
||||
// created by calling `interpreter.ModifyGraphWithDelegate`.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
struct TfLiteDelegate* delegate;
|
||||
} TfLiteNode;
|
||||
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
//
|
||||
// Currently, TfLiteDelegateParams has to be allocated in a way that it's
|
||||
// trivially destructable. It will be stored as `builtin_data` field in
|
||||
// `TfLiteNode` of the delegate node.
|
||||
//
|
||||
// See also the `CreateDelegateParams` function in `interpreter.cc` details.
|
||||
typedef struct TfLiteDelegateParams {
|
||||
struct TfLiteDelegate* delegate;
|
||||
TfLiteIntArray* nodes_to_replace;
|
||||
TfLiteIntArray* input_tensors;
|
||||
TfLiteIntArray* output_tensors;
|
||||
} TfLiteDelegateParams;
|
||||
|
||||
typedef struct TfLiteContext {
|
||||
// Number of tensors in the context.
|
||||
size_t tensors_size;
|
||||
|
||||
// The execution plan contains a list of the node indices in execution
|
||||
// order. execution_plan->size is the current number of nodes. And,
|
||||
// execution_plan->data[0] is the first node that needs to be run.
|
||||
// TfLiteDelegates can traverse the current execution plan by iterating
|
||||
// through each member of this array and using GetNodeAndRegistration() to
|
||||
// access details about a node. i.e.
|
||||
// TfLiteIntArray* execution_plan;
|
||||
// TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
|
||||
// for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
|
||||
// int node_index = execution_plan->data[exec_index];
|
||||
// TfLiteNode* node;
|
||||
// TfLiteRegistration* reg;
|
||||
// context->GetNodeAndRegistration(context, node_index, &node, ®);
|
||||
// }
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context,
|
||||
TfLiteIntArray** execution_plan);
|
||||
|
||||
// An array of tensors in the interpreter context (of length `tensors_size`)
|
||||
TfLiteTensor* tensors;
|
||||
|
||||
// opaque full context ptr (an opaque c++ data structure)
|
||||
void* impl_;
|
||||
|
||||
// Request memory pointer be resized. Updates dimensions on the tensor.
|
||||
// NOTE: ResizeTensor takes ownership of newSize.
|
||||
TfLiteStatus (*ResizeTensor)(struct TfLiteContext*, TfLiteTensor* tensor,
|
||||
TfLiteIntArray* new_size);
|
||||
// Request that an error be reported with format string msg.
|
||||
void (*ReportError)(struct TfLiteContext*, const char* msg, ...);
|
||||
|
||||
// Add `tensors_to_add` tensors, preserving pre-existing Tensor entries. If
|
||||
// non-null, the value pointed to by `first_new_tensor_index` will be set to
|
||||
// the index of the first new tensor.
|
||||
TfLiteStatus (*AddTensors)(struct TfLiteContext*, int tensors_to_add,
|
||||
int* first_new_tensor_index);
|
||||
|
||||
// Get a Tensor node by node_index.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteStatus (*GetNodeAndRegistration)(
|
||||
struct TfLiteContext*, int node_index, TfLiteNode** node,
|
||||
struct TfLiteRegistration** registration);
|
||||
|
||||
// Replace ops with one or more stub delegate operations. This function
|
||||
// does not take ownership of `nodes_to_replace`.
|
||||
TfLiteStatus (*ReplaceNodeSubsetsWithDelegateKernels)(
|
||||
struct TfLiteContext*, struct TfLiteRegistration registration,
|
||||
const TfLiteIntArray* nodes_to_replace, struct TfLiteDelegate* delegate);
|
||||
|
||||
// Number of threads that are recommended to subsystems like gemmlowp and
|
||||
// eigen.
|
||||
int recommended_num_threads;
|
||||
|
||||
// Access external contexts by type.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteExternalContext* (*GetExternalContext)(struct TfLiteContext*,
|
||||
TfLiteExternalContextType);
|
||||
// Set the value of a external context. Does not take ownership of the
|
||||
// pointer.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType,
|
||||
TfLiteExternalContext*);
|
||||
|
||||
// Flag for allowing float16 precision for FP32 calculation.
|
||||
// default: false.
|
||||
// WARNING: This is an experimental API and subject to change.
|
||||
bool allow_fp32_relax_to_fp16;
|
||||
|
||||
// Pointer to the op-level profiler, if set; nullptr otherwise.
|
||||
void* profiler;
|
||||
|
||||
// Allocate persistent buffer which has the same life time as the interpreter.
|
||||
// The memory is allocated from heap for TFL, and from tail in TFLM.
|
||||
// If *ptr is not nullptr, the pointer will be reallocated.
|
||||
// This method is only available in Prepare stage.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteStatus (*AllocatePersistentBuffer)(struct TfLiteContext* ctx,
|
||||
size_t bytes, void** ptr);
|
||||
|
||||
// Allocate a buffer which will be deallocated right after invoke phase.
|
||||
// The memory is allocated from heap in TFL, and from volatile arena in TFLM.
|
||||
// This method is only available in invoke stage.
|
||||
// NOTE: If possible use RequestScratchBufferInArena method to avoid memory
|
||||
// allocation during inference time.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteStatus (*AllocateBufferForEval)(struct TfLiteContext* ctx, size_t bytes,
|
||||
void** ptr);
|
||||
|
||||
// Request a scratch buffer in the arena through static memory planning.
|
||||
// This method is only available in Prepare stage and the buffer is allocated
|
||||
// by the interpreter between Prepare and Eval stage. In Eval stage,
|
||||
// GetScratchBuffer API can be used to fetch the address.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteStatus (*RequestScratchBufferInArena)(struct TfLiteContext* ctx,
|
||||
size_t bytes, int* buffer_idx);
|
||||
|
||||
// Get the scratch buffer pointer.
|
||||
// This method is only available in Eval stage.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
void* (*GetScratchBuffer)(struct TfLiteContext* ctx, int buffer_idx);
|
||||
|
||||
// Resize the memory pointer of the `tensor`. This method behaves the same as
|
||||
// `ResizeTensor`, except that it makes a copy of the shape array internally
|
||||
// so the shape array could be deallocated right afterwards.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteStatus (*ResizeTensorExplicit)(struct TfLiteContext* ctx,
|
||||
TfLiteTensor* tensor, int dims,
|
||||
const int* shape);
|
||||
|
||||
// This method provides a preview of post-delegation partitioning. Each
|
||||
// TfLiteDelegateParams in the referenced array corresponds to one instance of
|
||||
// the delegate kernel.
|
||||
// Example usage:
|
||||
//
|
||||
// TfLiteIntArray* nodes_to_replace = ...;
|
||||
// TfLiteDelegateParams* params_array;
|
||||
// int num_partitions = 0;
|
||||
// TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
|
||||
// context, delegate, nodes_to_replace, ¶ms_array, &num_partitions));
|
||||
// for (int idx = 0; idx < num_partitions; idx++) {
|
||||
// const auto& partition_params = params_array[idx];
|
||||
// ...
|
||||
// }
|
||||
//
|
||||
// NOTE: The context owns the memory referenced by partition_params_array. It
|
||||
// will be cleared with another call to PreviewDelegateParitioning, or after
|
||||
// TfLiteDelegateParams::Prepare returns.
|
||||
//
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteStatus (*PreviewDelegatePartitioning)(
|
||||
struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
|
||||
TfLiteDelegateParams** partition_params_array, int* num_partitions);
|
||||
} TfLiteContext;
|
||||
|
||||
typedef struct TfLiteRegistration {
|
||||
// Initializes the op from serialized data.
|
||||
// If a built-in op:
|
||||
// `buffer` is the op's params data (TfLiteLSTMParams*).
|
||||
// `length` is zero.
|
||||
// If custom op:
|
||||
// `buffer` is the op's `custom_options`.
|
||||
// `length` is the size of the buffer.
|
||||
//
|
||||
// Returns a type-punned (i.e. void*) opaque data (e.g. a primitive pointer
|
||||
// or an instance of a struct).
|
||||
//
|
||||
// The returned pointer will be stored with the node in the `user_data` field,
|
||||
// accessible within prepare and invoke functions below.
|
||||
// NOTE: if the data is already in the desired format, simply implement this
|
||||
// function to return `nullptr` and implement the free function to be a no-op.
|
||||
void* (*init)(TfLiteContext* context, const char* buffer, size_t length);
|
||||
|
||||
// The pointer `buffer` is the data previously returned by an init invocation.
|
||||
void (*free)(TfLiteContext* context, void* buffer);
|
||||
|
||||
// prepare is called when the inputs this node depends on have been resized.
|
||||
// context->ResizeTensor() can be called to request output tensors to be
|
||||
// resized.
|
||||
//
|
||||
// Returns kTfLiteOk on success.
|
||||
TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
// Execute the node (should read node->inputs and output to node->outputs).
|
||||
// Returns kTfLiteOk on success.
|
||||
TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
// profiling_string is called during summarization of profiling information
|
||||
// in order to group executions together. Providing a value here will cause a
|
||||
// given op to appear multiple times is the profiling report. This is
|
||||
// particularly useful for custom ops that can perform significantly
|
||||
// different calculations depending on their `user-data`.
|
||||
const char* (*profiling_string)(const TfLiteContext* context,
|
||||
const TfLiteNode* node);
|
||||
|
||||
// Builtin codes. If this kernel refers to a builtin this is the code
|
||||
// of the builtin. This is so we can do marshaling to other frameworks like
|
||||
// NN API.
|
||||
// Note: It is the responsibility of the registration binder to set this
|
||||
// properly.
|
||||
int32_t builtin_code;
|
||||
|
||||
// Custom op name. If the op is a builtin, this will be null.
|
||||
// Note: It is the responsibility of the registration binder to set this
|
||||
// properly.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
const char* custom_name;
|
||||
|
||||
// The version of the op.
|
||||
// Note: It is the responsibility of the registration binder to set this
|
||||
// properly.
|
||||
int version;
|
||||
} TfLiteRegistration;
|
||||
|
||||
// The flags used in `TfLiteDelegate`. Note that this is a bitmask, so the
|
||||
// values should be 1, 2, 4, 8, ...etc.
|
||||
typedef enum TfLiteDelegateFlags {
|
||||
kTfLiteDelegateFlagsNone = 0,
|
||||
// The flag is set if the delegate can handle dynamic sized tensors.
|
||||
// For example, the output shape of a `Resize` op with non-constant shape
|
||||
// can only be inferred when the op is invoked.
|
||||
// In this case, the Delegate is responsible for calling
|
||||
// `SetTensorToDynamic` to mark the tensor as a dynamic tensor, and calling
|
||||
// `ResizeTensor` when invoking the op.
|
||||
//
|
||||
// If the delegate isn't capable to handle dynamic tensors, this flag need
|
||||
// to be set to false.
|
||||
kTfLiteDelegateFlagsAllowDynamicTensors = 1
|
||||
} TfLiteDelegateFlags;
|
||||
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
typedef struct TfLiteDelegate {
|
||||
// Data that delegate needs to identify itself. This data is owned by the
|
||||
// delegate. The delegate is owned in the user code, so the delegate is
|
||||
// responsible for doing this when it is destroyed.
|
||||
void* data_;
|
||||
|
||||
// Invoked by ModifyGraphWithDelegate. This prepare is called, giving the
|
||||
// delegate a view of the current graph through TfLiteContext*. It typically
|
||||
// will look at the nodes and call ReplaceNodeSubsetsWithDelegateKernels()
|
||||
// to ask the TensorFlow lite runtime to create macro-nodes to represent
|
||||
// delegated subgraphs of the original graph.
|
||||
TfLiteStatus (*Prepare)(TfLiteContext* context,
|
||||
struct TfLiteDelegate* delegate);
|
||||
|
||||
// Copy the data from delegate buffer handle into raw memory of the given
|
||||
// 'tensor'. This cannot be null. The delegate is allowed to allocate the raw
|
||||
// bytes as long as it follows the rules for kTfLiteDynamic tensors.
|
||||
TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
|
||||
struct TfLiteDelegate* delegate,
|
||||
TfLiteBufferHandle buffer_handle,
|
||||
TfLiteTensor* tensor);
|
||||
|
||||
// Copy the data from raw memory of the given 'tensor' to delegate buffer
|
||||
// handle. This can be null if the delegate doesn't use its own buffer.
|
||||
TfLiteStatus (*CopyToBufferHandle)(TfLiteContext* context,
|
||||
struct TfLiteDelegate* delegate,
|
||||
TfLiteBufferHandle buffer_handle,
|
||||
TfLiteTensor* tensor);
|
||||
|
||||
// Free the Delegate Buffer Handle. Note: This only frees the handle, but
|
||||
// this doesn't release the underlying resource (e.g. textures). The
|
||||
// resources are either owned by application layer or the delegate.
|
||||
// This can be null if the delegate doesn't use its own buffer.
|
||||
void (*FreeBufferHandle)(TfLiteContext* context,
|
||||
struct TfLiteDelegate* delegate,
|
||||
TfLiteBufferHandle* handle);
|
||||
|
||||
// Bitmask flags. See the comments in `TfLiteDelegateFlags`.
|
||||
int64_t flags;
|
||||
} TfLiteDelegate;
|
||||
|
||||
// Build a 'null' delegate, with all the fields properly set to their default
|
||||
// values.
|
||||
TfLiteDelegate TfLiteDelegateCreate();
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
#endif // TENSORFLOW_LITE_C_COMMON_H_
|
||||
38
code/lib/tfmicro/tensorflow/lite/core/api/error_reporter.cc
Normal file
38
code/lib/tfmicro/tensorflow/lite/core/api/error_reporter.cc
Normal file
@@ -0,0 +1,38 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include <cstdarg>
|
||||
|
||||
namespace tflite {
|
||||
|
||||
int ErrorReporter::Report(const char* format, ...) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
int code = Report(format, args);
|
||||
va_end(args);
|
||||
return code;
|
||||
}
|
||||
|
||||
// TODO(aselle): Make the name of ReportError on context the same, so
|
||||
// we can use the ensure functions w/o a context and w/ a reporter.
|
||||
int ErrorReporter::ReportError(void*, const char* format, ...) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
int code = Report(format, args);
|
||||
va_end(args);
|
||||
return code;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
59
code/lib/tfmicro/tensorflow/lite/core/api/error_reporter.h
Normal file
59
code/lib/tfmicro/tensorflow/lite/core/api/error_reporter.h
Normal file
@@ -0,0 +1,59 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
|
||||
#define TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
namespace tflite {
|
||||
|
||||
/// A functor that reports error to supporting system. Invoked similar to
|
||||
/// printf.
|
||||
///
|
||||
/// Usage:
|
||||
/// ErrorReporter foo;
|
||||
/// foo.Report("test %d", 5);
|
||||
/// or
|
||||
/// va_list args;
|
||||
/// foo.Report("test %d", args); // where args is va_list
|
||||
///
|
||||
/// Subclass ErrorReporter to provide another reporting destination.
|
||||
/// For example, if you have a GUI program, you might redirect to a buffer
|
||||
/// that drives a GUI error log box.
|
||||
class ErrorReporter {
|
||||
public:
|
||||
virtual ~ErrorReporter() {}
|
||||
virtual int Report(const char* format, va_list args) = 0;
|
||||
int Report(const char* format, ...);
|
||||
int ReportError(void*, const char* format, ...);
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
// You should not make bare calls to the error reporter, instead use the
|
||||
// TF_LITE_REPORT_ERROR macro, since this allows message strings to be
|
||||
// stripped when the binary size has to be optimized. If you are looking to
|
||||
// reduce binary size, define TF_LITE_STRIP_ERROR_STRINGS when compiling and
|
||||
// every call will be stubbed out, taking no memory.
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
#define TF_LITE_REPORT_ERROR(reporter, ...) \
|
||||
do { \
|
||||
static_cast<tflite::ErrorReporter*>(reporter)->Report(__VA_ARGS__); \
|
||||
} while (false)
|
||||
#else // TF_LITE_STRIP_ERROR_STRINGS
|
||||
#define TF_LITE_REPORT_ERROR(reporter, ...)
|
||||
#endif // TF_LITE_STRIP_ERROR_STRINGS
|
||||
|
||||
#endif // TENSORFLOW_LITE_CORE_API_ERROR_REPORTER_H_
|
||||
@@ -0,0 +1,919 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
// Utility class for safely allocating POD data. This is useful for avoiding
|
||||
// leaks in cases where op params are allocated but fail to propagate to the
|
||||
// parsed op data (e.g., when model parameters are invalid).
|
||||
class SafeBuiltinDataAllocator {
|
||||
public:
|
||||
class BuiltinDataDeleter {
|
||||
public:
|
||||
explicit BuiltinDataDeleter(BuiltinDataAllocator* allocator)
|
||||
: allocator_(allocator) {}
|
||||
|
||||
void operator()(void* data) { allocator_->Deallocate(data); }
|
||||
|
||||
private:
|
||||
BuiltinDataAllocator* allocator_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using BuiltinDataPtr = std::unique_ptr<T, BuiltinDataDeleter>;
|
||||
|
||||
explicit SafeBuiltinDataAllocator(BuiltinDataAllocator* allocator)
|
||||
: allocator_(allocator) {}
|
||||
|
||||
template <typename T>
|
||||
BuiltinDataPtr<T> Allocate() {
|
||||
return BuiltinDataPtr<T>(allocator_->AllocatePOD<T>(),
|
||||
BuiltinDataDeleter(allocator_));
|
||||
}
|
||||
|
||||
private:
|
||||
BuiltinDataAllocator* allocator_;
|
||||
};
|
||||
|
||||
// Copies the contents from the flatbuffer int vector `flatbuffer` into the
|
||||
// int array `buffer`. `flat_vector` and `buffer` represent the same
|
||||
// configuration operation for a given operation.
|
||||
TfLiteStatus FlatBufferIntVectorToArray(
|
||||
int max_size_of_buffer, const flatbuffers::Vector<int32_t>* flat_vector,
|
||||
int* buffer, ErrorReporter* error_reporter, const char* op_name) {
|
||||
if (!flat_vector) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter,
|
||||
"Input array not provided for operation '%s'.\n",
|
||||
op_name);
|
||||
return kTfLiteError;
|
||||
} else {
|
||||
size_t num_dimensions = flat_vector->size();
|
||||
if (num_dimensions > max_size_of_buffer / sizeof(int)) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter,
|
||||
"Found too many dimensions in the input array of operation '%s'.\n",
|
||||
op_name);
|
||||
return kTfLiteError;
|
||||
} else {
|
||||
for (size_t i = 0; i < num_dimensions; ++i) {
|
||||
buffer[i] = flat_vector->Get(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
|
||||
ErrorReporter* error_reporter) {
|
||||
switch (tensor_type) {
|
||||
case TensorType_FLOAT16:
|
||||
*type = kTfLiteFloat16;
|
||||
return kTfLiteOk;
|
||||
case TensorType_FLOAT32:
|
||||
*type = kTfLiteFloat32;
|
||||
return kTfLiteOk;
|
||||
case TensorType_FLOAT64:
|
||||
*type = kTfLiteFloat64;
|
||||
return kTfLiteOk;
|
||||
case TensorType_INT16:
|
||||
*type = kTfLiteInt16;
|
||||
return kTfLiteOk;
|
||||
case TensorType_INT32:
|
||||
*type = kTfLiteInt32;
|
||||
return kTfLiteOk;
|
||||
case TensorType_UINT8:
|
||||
*type = kTfLiteUInt8;
|
||||
return kTfLiteOk;
|
||||
case TensorType_INT8:
|
||||
*type = kTfLiteInt8;
|
||||
return kTfLiteOk;
|
||||
case TensorType_INT64:
|
||||
*type = kTfLiteInt64;
|
||||
return kTfLiteOk;
|
||||
case TensorType_STRING:
|
||||
*type = kTfLiteString;
|
||||
return kTfLiteOk;
|
||||
case TensorType_BOOL:
|
||||
*type = kTfLiteBool;
|
||||
return kTfLiteOk;
|
||||
case TensorType_COMPLEX64:
|
||||
*type = kTfLiteComplex64;
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
*type = kTfLiteNoType;
|
||||
TF_LITE_REPORT_ERROR(error_reporter,
|
||||
"Unsupported data type %d in tensor\n", tensor_type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the appropriate data out of the op.
|
||||
//
|
||||
// This handles builtin data explicitly as there are flatbuffer schemas.
|
||||
// If it returns kTfLiteOk, it passes the data out with `builtin_data`, which
|
||||
// need to be released by calling `free`.`
|
||||
// If it returns kTfLiteError, `builtin_data` will be `nullptr`.
|
||||
TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data) {
|
||||
auto parse_padding = [](Padding padding) {
|
||||
switch (padding) {
|
||||
case Padding_SAME:
|
||||
return kTfLitePaddingSame;
|
||||
case Padding_VALID:
|
||||
return kTfLitePaddingValid;
|
||||
}
|
||||
return kTfLitePaddingUnknown;
|
||||
};
|
||||
auto parse_activation = [](ActivationFunctionType activation) {
|
||||
switch (activation) {
|
||||
case ActivationFunctionType_NONE:
|
||||
return kTfLiteActNone;
|
||||
case ActivationFunctionType_RELU:
|
||||
return kTfLiteActRelu;
|
||||
case ActivationFunctionType_RELU_N1_TO_1:
|
||||
return kTfLiteActRelu1;
|
||||
case ActivationFunctionType_RELU6:
|
||||
return kTfLiteActRelu6;
|
||||
case ActivationFunctionType_TANH:
|
||||
return kTfLiteActTanh;
|
||||
case ActivationFunctionType_SIGN_BIT:
|
||||
return kTfLiteActSignBit;
|
||||
}
|
||||
return kTfLiteActNone;
|
||||
};
|
||||
auto parseLSHProjectionType = [](LSHProjectionType type) {
|
||||
switch (type) {
|
||||
case LSHProjectionType_SPARSE:
|
||||
return kTfLiteLshProjectionSparse;
|
||||
case LSHProjectionType_DENSE:
|
||||
return kTfLiteLshProjectionDense;
|
||||
default:
|
||||
return kTfLiteLshProjectionUnknown;
|
||||
}
|
||||
};
|
||||
auto parseCombinerType = [](CombinerType type) {
|
||||
switch (type) {
|
||||
case CombinerType_MEAN:
|
||||
return kTfLiteCombinerTypeMean;
|
||||
case CombinerType_SQRTN:
|
||||
return kTfLiteCombinerTypeSqrtn;
|
||||
case CombinerType_SUM:
|
||||
default:
|
||||
return kTfLiteCombinerTypeSum;
|
||||
}
|
||||
};
|
||||
|
||||
SafeBuiltinDataAllocator safe_allocator(allocator);
|
||||
*builtin_data = nullptr;
|
||||
switch (op_type) {
|
||||
case BuiltinOperator_CONV_2D: {
|
||||
auto params = safe_allocator.Allocate<TfLiteConvParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (auto* conv_params = op->builtin_options_as_Conv2DOptions()) {
|
||||
params->padding = parse_padding(conv_params->padding());
|
||||
params->stride_width = conv_params->stride_w();
|
||||
params->stride_height = conv_params->stride_h();
|
||||
params->activation =
|
||||
parse_activation(conv_params->fused_activation_function());
|
||||
|
||||
params->dilation_width_factor = conv_params->dilation_w_factor();
|
||||
params->dilation_height_factor = conv_params->dilation_h_factor();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_CAST: {
|
||||
auto params = safe_allocator.Allocate<TfLiteCastParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_CastOptions()) {
|
||||
TF_LITE_ENSURE_STATUS(ConvertTensorType(schema_params->in_data_type(),
|
||||
¶ms->in_data_type,
|
||||
error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(ConvertTensorType(schema_params->out_data_type(),
|
||||
¶ms->out_data_type,
|
||||
error_reporter));
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_LSH_PROJECTION: {
|
||||
auto params = safe_allocator.Allocate<TfLiteLSHProjectionParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* lshParams =
|
||||
op->builtin_options_as_LSHProjectionOptions()) {
|
||||
params->type = parseLSHProjectionType(lshParams->type());
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_AVERAGE_POOL_2D:
|
||||
case BuiltinOperator_MAX_POOL_2D:
|
||||
case BuiltinOperator_L2_POOL_2D: {
|
||||
auto params = safe_allocator.Allocate<TfLitePoolParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* pool_params = op->builtin_options_as_Pool2DOptions()) {
|
||||
params->padding = parse_padding(pool_params->padding());
|
||||
params->stride_width = pool_params->stride_w();
|
||||
params->stride_height = pool_params->stride_h();
|
||||
params->filter_width = pool_params->filter_width();
|
||||
params->filter_height = pool_params->filter_height();
|
||||
params->activation =
|
||||
parse_activation(pool_params->fused_activation_function());
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_DEPTHWISE_CONV_2D: {
|
||||
auto params = safe_allocator.Allocate<TfLiteDepthwiseConvParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* conv_params =
|
||||
op->builtin_options_as_DepthwiseConv2DOptions()) {
|
||||
params->padding = parse_padding(conv_params->padding());
|
||||
params->stride_width = conv_params->stride_w();
|
||||
params->stride_height = conv_params->stride_h();
|
||||
params->depth_multiplier = conv_params->depth_multiplier();
|
||||
params->activation =
|
||||
parse_activation(conv_params->fused_activation_function());
|
||||
|
||||
params->dilation_width_factor = conv_params->dilation_w_factor();
|
||||
params->dilation_height_factor = conv_params->dilation_h_factor();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_SVDF: {
|
||||
auto params = safe_allocator.Allocate<TfLiteSVDFParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* svdf_params = op->builtin_options_as_SVDFOptions()) {
|
||||
params->rank = svdf_params->rank();
|
||||
params->activation =
|
||||
parse_activation(svdf_params->fused_activation_function());
|
||||
params->asymmetric_quantize_inputs =
|
||||
svdf_params->asymmetric_quantize_inputs();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: {
|
||||
auto params = safe_allocator.Allocate<TfLiteSequenceRNNParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* sequence_rnn_params =
|
||||
op->builtin_options_as_SequenceRNNOptions()) {
|
||||
params->activation =
|
||||
parse_activation(sequence_rnn_params->fused_activation_function());
|
||||
params->time_major = sequence_rnn_params->time_major();
|
||||
params->asymmetric_quantize_inputs =
|
||||
sequence_rnn_params->asymmetric_quantize_inputs();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: {
|
||||
auto params =
|
||||
safe_allocator.Allocate<TfLiteBidirectionalSequenceRNNParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* bidi_sequence_rnn_params =
|
||||
op->builtin_options_as_BidirectionalSequenceRNNOptions()) {
|
||||
params->activation = parse_activation(
|
||||
bidi_sequence_rnn_params->fused_activation_function());
|
||||
params->time_major = bidi_sequence_rnn_params->time_major();
|
||||
params->merge_outputs = bidi_sequence_rnn_params->merge_outputs();
|
||||
params->asymmetric_quantize_inputs =
|
||||
bidi_sequence_rnn_params->asymmetric_quantize_inputs();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_RNN: {
|
||||
auto params = safe_allocator.Allocate<TfLiteRNNParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* rnn_params = op->builtin_options_as_RNNOptions()) {
|
||||
params->activation =
|
||||
parse_activation(rnn_params->fused_activation_function());
|
||||
params->asymmetric_quantize_inputs =
|
||||
rnn_params->asymmetric_quantize_inputs();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: {
|
||||
auto params =
|
||||
safe_allocator.Allocate<TfLiteEmbeddingLookupSparseParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* embedding_params =
|
||||
op->builtin_options_as_EmbeddingLookupSparseOptions()) {
|
||||
params->combiner = parseCombinerType(embedding_params->combiner());
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_FULLY_CONNECTED: {
|
||||
auto params = safe_allocator.Allocate<TfLiteFullyConnectedParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* fully_connected_params =
|
||||
op->builtin_options_as_FullyConnectedOptions()) {
|
||||
params->activation = parse_activation(
|
||||
fully_connected_params->fused_activation_function());
|
||||
params->keep_num_dims = fully_connected_params->keep_num_dims();
|
||||
params->asymmetric_quantize_inputs =
|
||||
fully_connected_params->asymmetric_quantize_inputs();
|
||||
switch (fully_connected_params->weights_format()) {
|
||||
case FullyConnectedOptionsWeightsFormat_DEFAULT:
|
||||
params->weights_format = kTfLiteFullyConnectedWeightsFormatDefault;
|
||||
break;
|
||||
case FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8:
|
||||
params->weights_format =
|
||||
kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8;
|
||||
break;
|
||||
default:
|
||||
TF_LITE_REPORT_ERROR(error_reporter,
|
||||
"Unhandled fully-connected weights format.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_HASHTABLE_LOOKUP:
|
||||
// no-op.
|
||||
return kTfLiteOk;
|
||||
case BuiltinOperator_SOFTMAX: {
|
||||
auto params = safe_allocator.Allocate<TfLiteSoftmaxParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* softmax_params =
|
||||
op->builtin_options_as_SoftmaxOptions()) {
|
||||
params->beta = softmax_params->beta();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_CONCATENATION: {
|
||||
auto params = safe_allocator.Allocate<TfLiteConcatenationParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* concatenation_params =
|
||||
op->builtin_options_as_ConcatenationOptions()) {
|
||||
params->activation =
|
||||
parse_activation(concatenation_params->fused_activation_function());
|
||||
params->axis = concatenation_params->axis();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_MUL: {
|
||||
auto params = safe_allocator.Allocate<TfLiteMulParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_MulOptions()) {
|
||||
params->activation =
|
||||
parse_activation(schema_params->fused_activation_function());
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_ADD: {
|
||||
auto params = safe_allocator.Allocate<TfLiteAddParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_AddOptions()) {
|
||||
params->activation =
|
||||
parse_activation(schema_params->fused_activation_function());
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_DIV: {
|
||||
auto params = safe_allocator.Allocate<TfLiteDivParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_DivOptions()) {
|
||||
params->activation =
|
||||
parse_activation(schema_params->fused_activation_function());
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_SUB: {
|
||||
auto params = safe_allocator.Allocate<TfLiteSubParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_SubOptions()) {
|
||||
params->activation =
|
||||
parse_activation(schema_params->fused_activation_function());
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_L2_NORMALIZATION: {
|
||||
auto params = safe_allocator.Allocate<TfLiteL2NormParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_L2NormOptions()) {
|
||||
params->activation =
|
||||
parse_activation(schema_params->fused_activation_function());
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: {
|
||||
auto params = safe_allocator.Allocate<TfLiteLocalResponseNormParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params =
|
||||
op->builtin_options_as_LocalResponseNormalizationOptions()) {
|
||||
params->radius = schema_params->radius();
|
||||
params->bias = schema_params->bias();
|
||||
params->alpha = schema_params->alpha();
|
||||
params->beta = schema_params->beta();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_LSTM: {
|
||||
auto params = safe_allocator.Allocate<TfLiteLSTMParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
|
||||
params->activation =
|
||||
parse_activation(lstm_params->fused_activation_function());
|
||||
params->cell_clip = lstm_params->cell_clip();
|
||||
params->proj_clip = lstm_params->proj_clip();
|
||||
switch (lstm_params->kernel_type()) {
|
||||
case LSTMKernelType_FULL:
|
||||
params->kernel_type = kTfLiteLSTMFullKernel;
|
||||
break;
|
||||
case LSTMKernelType_BASIC:
|
||||
params->kernel_type = kTfLiteLSTMBasicKernel;
|
||||
break;
|
||||
default:
|
||||
TF_LITE_REPORT_ERROR(error_reporter,
|
||||
"Unhandled LSTM kernel type: %d",
|
||||
lstm_params->kernel_type());
|
||||
return kTfLiteError;
|
||||
}
|
||||
params->asymmetric_quantize_inputs =
|
||||
lstm_params->asymmetric_quantize_inputs();
|
||||
} else {
|
||||
TF_LITE_REPORT_ERROR(error_reporter,
|
||||
"No valid LSTM builtin options exist");
|
||||
return kTfLiteError;
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: {
|
||||
auto params =
|
||||
safe_allocator.Allocate<TfLiteUnidirectionalSequenceLSTMParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* seq_lstm_params =
|
||||
op->builtin_options_as_UnidirectionalSequenceLSTMOptions()) {
|
||||
params->activation =
|
||||
parse_activation(seq_lstm_params->fused_activation_function());
|
||||
params->cell_clip = seq_lstm_params->cell_clip();
|
||||
params->proj_clip = seq_lstm_params->proj_clip();
|
||||
params->time_major = seq_lstm_params->time_major();
|
||||
params->asymmetric_quantize_inputs =
|
||||
seq_lstm_params->asymmetric_quantize_inputs();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: {
|
||||
auto params =
|
||||
safe_allocator.Allocate<TfLiteBidirectionalSequenceLSTMParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* bidi_lstm_params =
|
||||
op->builtin_options_as_BidirectionalSequenceLSTMOptions()) {
|
||||
params->activation =
|
||||
parse_activation(bidi_lstm_params->fused_activation_function());
|
||||
params->cell_clip = bidi_lstm_params->cell_clip();
|
||||
params->proj_clip = bidi_lstm_params->proj_clip();
|
||||
params->merge_outputs = bidi_lstm_params->merge_outputs();
|
||||
params->time_major = bidi_lstm_params->time_major();
|
||||
params->asymmetric_quantize_inputs =
|
||||
bidi_lstm_params->asymmetric_quantize_inputs();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_RESIZE_BILINEAR: {
|
||||
auto params = safe_allocator.Allocate<TfLiteResizeBilinearParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params =
|
||||
op->builtin_options_as_ResizeBilinearOptions()) {
|
||||
params->align_corners = schema_params->align_corners();
|
||||
params->half_pixel_centers = schema_params->half_pixel_centers();
|
||||
} else {
|
||||
// Some older models did not populate the ResizeBilinearOptions field in
|
||||
// the flatbuffer, so ensure it's set to a sensible default.
|
||||
params->align_corners = false;
|
||||
params->half_pixel_centers = false;
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR: {
|
||||
auto params =
|
||||
safe_allocator.Allocate<TfLiteResizeNearestNeighborParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params =
|
||||
op->builtin_options_as_ResizeNearestNeighborOptions()) {
|
||||
params->align_corners = schema_params->align_corners();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_RESHAPE: {
|
||||
auto params = safe_allocator.Allocate<TfLiteReshapeParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_ReshapeOptions()) {
|
||||
auto* new_shape = schema_params->new_shape();
|
||||
// TODO(b/147203660): We need to figure out when dynamic reshape
|
||||
// (new_shape is a tensor) happens, why the option is not a nullptr.
|
||||
// But nonethless, we should only copy when new_shape is not a nullptr.
|
||||
if (new_shape) {
|
||||
TF_LITE_ENSURE_STATUS(FlatBufferIntVectorToArray(
|
||||
sizeof(params->shape), new_shape, params->shape, error_reporter,
|
||||
"reshape"));
|
||||
params->num_dimensions = new_shape->size();
|
||||
}
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_SKIP_GRAM: {
|
||||
auto params = safe_allocator.Allocate<TfLiteSkipGramParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* skip_gram_params =
|
||||
op->builtin_options_as_SkipGramOptions()) {
|
||||
params->ngram_size = skip_gram_params->ngram_size();
|
||||
params->max_skip_size = skip_gram_params->max_skip_size();
|
||||
params->include_all_ngrams = skip_gram_params->include_all_ngrams();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_SPACE_TO_DEPTH: {
|
||||
auto params = safe_allocator.Allocate<TfLiteSpaceToDepthParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params =
|
||||
op->builtin_options_as_SpaceToDepthOptions()) {
|
||||
params->block_size = schema_params->block_size();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_DEPTH_TO_SPACE: {
|
||||
auto params = safe_allocator.Allocate<TfLiteDepthToSpaceParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params =
|
||||
op->builtin_options_as_DepthToSpaceOptions()) {
|
||||
params->block_size = schema_params->block_size();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_GATHER: {
|
||||
auto params = safe_allocator.Allocate<TfLiteGatherParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
params->axis = 0;
|
||||
if (const auto* gather_params = op->builtin_options_as_GatherOptions()) {
|
||||
params->axis = gather_params->axis();
|
||||
}
|
||||
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_MEAN:
|
||||
case BuiltinOperator_REDUCE_MAX:
|
||||
case BuiltinOperator_REDUCE_MIN:
|
||||
case BuiltinOperator_REDUCE_PROD:
|
||||
case BuiltinOperator_REDUCE_ANY:
|
||||
case BuiltinOperator_SUM: {
|
||||
auto params = safe_allocator.Allocate<TfLiteReducerParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_ReducerOptions()) {
|
||||
params->keep_dims = schema_params->keep_dims();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_SPLIT: {
|
||||
auto params = safe_allocator.Allocate<TfLiteSplitParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_SplitOptions()) {
|
||||
params->num_splits = schema_params->num_splits();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_SPLIT_V: {
|
||||
auto params = safe_allocator.Allocate<TfLiteSplitParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_SplitVOptions()) {
|
||||
params->num_splits = schema_params->num_splits();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_SQUEEZE: {
|
||||
auto params = safe_allocator.Allocate<TfLiteSqueezeParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_SqueezeOptions()) {
|
||||
const auto* squeeze_dims = schema_params->squeeze_dims();
|
||||
TF_LITE_ENSURE_STATUS(FlatBufferIntVectorToArray(
|
||||
sizeof(params->squeeze_dims), squeeze_dims, params->squeeze_dims,
|
||||
error_reporter, "squeeze"));
|
||||
params->num_squeeze_dims = squeeze_dims->size();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_STRIDED_SLICE: {
|
||||
auto params = safe_allocator.Allocate<TfLiteStridedSliceParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params =
|
||||
op->builtin_options_as_StridedSliceOptions()) {
|
||||
params->begin_mask = schema_params->begin_mask();
|
||||
params->end_mask = schema_params->end_mask();
|
||||
params->ellipsis_mask = schema_params->ellipsis_mask();
|
||||
params->new_axis_mask = schema_params->new_axis_mask();
|
||||
params->shrink_axis_mask = schema_params->shrink_axis_mask();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_ARG_MAX: {
|
||||
auto params = safe_allocator.Allocate<TfLiteArgMaxParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_ArgMaxOptions()) {
|
||||
TF_LITE_ENSURE_STATUS(ConvertTensorType(schema_params->output_type(),
|
||||
¶ms->output_type,
|
||||
error_reporter));
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_ARG_MIN: {
|
||||
auto params = safe_allocator.Allocate<TfLiteArgMinParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_ArgMinOptions()) {
|
||||
TF_LITE_ENSURE_STATUS(ConvertTensorType(schema_params->output_type(),
|
||||
¶ms->output_type,
|
||||
error_reporter));
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_TRANSPOSE_CONV: {
|
||||
auto params = safe_allocator.Allocate<TfLiteTransposeConvParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* transpose_conv_params =
|
||||
op->builtin_options_as_TransposeConvOptions()) {
|
||||
params->padding = parse_padding(transpose_conv_params->padding());
|
||||
params->stride_width = transpose_conv_params->stride_w();
|
||||
params->stride_height = transpose_conv_params->stride_h();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_SPARSE_TO_DENSE: {
|
||||
auto params = safe_allocator.Allocate<TfLiteSparseToDenseParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* sparse_to_dense_params =
|
||||
op->builtin_options_as_SparseToDenseOptions()) {
|
||||
params->validate_indices = sparse_to_dense_params->validate_indices();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_SHAPE: {
|
||||
auto params = safe_allocator.Allocate<TfLiteShapeParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_ShapeOptions()) {
|
||||
TF_LITE_ENSURE_STATUS(ConvertTensorType(
|
||||
schema_params->out_type(), ¶ms->out_type, error_reporter));
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_PACK: {
|
||||
auto params = safe_allocator.Allocate<TfLitePackParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* pack_params = op->builtin_options_as_PackOptions()) {
|
||||
params->values_count = pack_params->values_count();
|
||||
params->axis = pack_params->axis();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_DELEGATE: {
|
||||
// TODO(ycling): Revisit when supporting saving delegated models.
|
||||
TF_LITE_REPORT_ERROR(error_reporter,
|
||||
"DELEGATE op shouldn't exist in model.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
case BuiltinOperator_FAKE_QUANT: {
|
||||
auto params = safe_allocator.Allocate<TfLiteFakeQuantParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params =
|
||||
op->builtin_options_as_FakeQuantOptions()) {
|
||||
params->min = schema_params->min();
|
||||
params->max = schema_params->max();
|
||||
params->num_bits = schema_params->num_bits();
|
||||
params->narrow_range = schema_params->narrow_range();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_ONE_HOT: {
|
||||
auto params = safe_allocator.Allocate<TfLiteOneHotParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* schema_params = op->builtin_options_as_OneHotOptions()) {
|
||||
params->axis = schema_params->axis();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_UNPACK: {
|
||||
auto params = safe_allocator.Allocate<TfLiteUnpackParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* unpack_params = op->builtin_options_as_UnpackOptions()) {
|
||||
params->num = unpack_params->num();
|
||||
params->axis = unpack_params->axis();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_LEAKY_RELU: {
|
||||
auto params = safe_allocator.Allocate<TfLiteLeakyReluParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* leaky_relu_params =
|
||||
op->builtin_options_as_LeakyReluOptions()) {
|
||||
params->alpha = leaky_relu_params->alpha();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_MIRROR_PAD: {
|
||||
auto params = safe_allocator.Allocate<TfLiteMirrorPaddingParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
const auto* mirror_pad_params = op->builtin_options_as_MirrorPadOptions();
|
||||
if (mirror_pad_params != nullptr) {
|
||||
params->mode =
|
||||
mirror_pad_params->mode() == tflite::MirrorPadMode_REFLECT
|
||||
? TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect
|
||||
: TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingSymmetric;
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_UNIQUE: {
|
||||
auto params = safe_allocator.Allocate<TfLiteUniqueParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
const auto* unique_params = op->builtin_options_as_UniqueOptions();
|
||||
if (unique_params != nullptr) {
|
||||
params->index_out_type =
|
||||
unique_params->idx_out_type() == tflite::TensorType_INT64
|
||||
? TfLiteType::kTfLiteInt64
|
||||
: TfLiteType::kTfLiteInt32;
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_REVERSE_SEQUENCE: {
|
||||
auto params = safe_allocator.Allocate<TfLiteReverseSequenceParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* reverse_seq_params =
|
||||
op->builtin_options_as_ReverseSequenceOptions()) {
|
||||
params->seq_dim = reverse_seq_params->seq_dim();
|
||||
params->batch_dim = reverse_seq_params->batch_dim();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_IF: {
|
||||
auto params = safe_allocator.Allocate<TfLiteIfParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* if_params = op->builtin_options_as_IfOptions()) {
|
||||
params->then_subgraph_index = if_params->then_subgraph_index();
|
||||
params->else_subgraph_index = if_params->else_subgraph_index();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_WHILE: {
|
||||
auto params = safe_allocator.Allocate<TfLiteWhileParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* while_params = op->builtin_options_as_WhileOptions()) {
|
||||
params->cond_subgraph_index = while_params->cond_subgraph_index();
|
||||
params->body_subgraph_index = while_params->body_subgraph_index();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case BuiltinOperator_BATCH_MATMUL: {
|
||||
auto params = safe_allocator.Allocate<TfLiteBatchMatMulParams>();
|
||||
TF_LITE_ENSURE(error_reporter, params != nullptr);
|
||||
if (const auto* bmm_params =
|
||||
op->builtin_options_as_BatchMatMulOptions()) {
|
||||
params->adj_x = bmm_params->adj_x();
|
||||
params->adj_y = bmm_params->adj_y();
|
||||
}
|
||||
*builtin_data = params.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
// Below are the ops with no builtin_data structure.
|
||||
case BuiltinOperator_ABS:
|
||||
case BuiltinOperator_BATCH_TO_SPACE_ND:
|
||||
// TODO(aselle): Implement call in BuiltinOptions, but nullptrs are
|
||||
// ok for now, since there is no call implementation either.
|
||||
case BuiltinOperator_CALL:
|
||||
case BuiltinOperator_CONCAT_EMBEDDINGS:
|
||||
case BuiltinOperator_COS:
|
||||
case BuiltinOperator_CUSTOM:
|
||||
case BuiltinOperator_DEQUANTIZE:
|
||||
case BuiltinOperator_ELU:
|
||||
case BuiltinOperator_EMBEDDING_LOOKUP:
|
||||
case BuiltinOperator_EQUAL:
|
||||
case BuiltinOperator_EXP:
|
||||
case BuiltinOperator_EXPAND_DIMS:
|
||||
case BuiltinOperator_CEIL:
|
||||
case BuiltinOperator_FLOOR:
|
||||
case BuiltinOperator_GREATER:
|
||||
case BuiltinOperator_GREATER_EQUAL:
|
||||
case BuiltinOperator_HARD_SWISH:
|
||||
case BuiltinOperator_LESS:
|
||||
case BuiltinOperator_LESS_EQUAL:
|
||||
case BuiltinOperator_LOG:
|
||||
case BuiltinOperator_LOGISTIC:
|
||||
case BuiltinOperator_LOG_SOFTMAX:
|
||||
case BuiltinOperator_MATRIX_DIAG:
|
||||
case BuiltinOperator_MATRIX_SET_DIAG:
|
||||
case BuiltinOperator_MAXIMUM:
|
||||
case BuiltinOperator_MINIMUM:
|
||||
case BuiltinOperator_NEG:
|
||||
case BuiltinOperator_NOT_EQUAL:
|
||||
case BuiltinOperator_PAD:
|
||||
case BuiltinOperator_PADV2:
|
||||
case BuiltinOperator_PRELU:
|
||||
case BuiltinOperator_RELU:
|
||||
case BuiltinOperator_RELU6:
|
||||
case BuiltinOperator_RELU_N1_TO_1:
|
||||
case BuiltinOperator_ROUND:
|
||||
case BuiltinOperator_RSQRT:
|
||||
case BuiltinOperator_SELECT:
|
||||
case BuiltinOperator_SELECT_V2:
|
||||
case BuiltinOperator_SIN:
|
||||
case BuiltinOperator_SLICE:
|
||||
case BuiltinOperator_SPACE_TO_BATCH_ND:
|
||||
case BuiltinOperator_SQRT:
|
||||
case BuiltinOperator_TANH:
|
||||
case BuiltinOperator_TILE:
|
||||
case BuiltinOperator_TOPK_V2:
|
||||
case BuiltinOperator_TRANSPOSE:
|
||||
case BuiltinOperator_POW:
|
||||
case BuiltinOperator_LOGICAL_OR:
|
||||
case BuiltinOperator_LOGICAL_AND:
|
||||
case BuiltinOperator_LOGICAL_NOT:
|
||||
case BuiltinOperator_FLOOR_DIV:
|
||||
case BuiltinOperator_SQUARE:
|
||||
case BuiltinOperator_ZEROS_LIKE:
|
||||
case BuiltinOperator_FILL:
|
||||
case BuiltinOperator_FLOOR_MOD:
|
||||
case BuiltinOperator_RANGE:
|
||||
case BuiltinOperator_SQUARED_DIFFERENCE:
|
||||
case BuiltinOperator_REVERSE_V2:
|
||||
case BuiltinOperator_ADD_N:
|
||||
case BuiltinOperator_GATHER_ND:
|
||||
case BuiltinOperator_WHERE:
|
||||
case BuiltinOperator_RANK:
|
||||
case BuiltinOperator_QUANTIZE:
|
||||
case BuiltinOperator_NON_MAX_SUPPRESSION_V4:
|
||||
case BuiltinOperator_NON_MAX_SUPPRESSION_V5:
|
||||
case BuiltinOperator_SCATTER_ND:
|
||||
case BuiltinOperator_DENSIFY:
|
||||
case BuiltinOperator_SEGMENT_SUM:
|
||||
return kTfLiteOk;
|
||||
}
|
||||
return kTfLiteError;
|
||||
} // NOLINT[readability/fn_size]
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,71 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
|
||||
#define TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
|
||||
|
||||
// These functions transform codes and data structures that are defined in the
|
||||
// flatbuffer serialization format into in-memory values that are used by the
|
||||
// runtime API and interpreter.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Interface class for builtin data allocations.
|
||||
class BuiltinDataAllocator {
|
||||
public:
|
||||
virtual void* Allocate(size_t size, size_t alignment_hint) = 0;
|
||||
virtual void Deallocate(void* data) = 0;
|
||||
|
||||
// Allocate a structure, but make sure it is a POD structure that doesn't
|
||||
// require constructors to run. The reason we do this, is that Interpreter's C
|
||||
// extension part will take ownership so destructors will not be run during
|
||||
// deallocation.
|
||||
template <typename T>
|
||||
T* AllocatePOD() {
|
||||
// TODO(b/154346074): Change this to is_trivially_destructible when all
|
||||
// platform targets support that properly.
|
||||
static_assert(std::is_pod<T>::value, "Builtin data structure must be POD.");
|
||||
void* allocated_memory = this->Allocate(sizeof(T), alignof(T));
|
||||
return new (allocated_memory) T;
|
||||
}
|
||||
|
||||
virtual ~BuiltinDataAllocator() {}
|
||||
};
|
||||
|
||||
// Parse the appropriate data out of the op.
|
||||
//
|
||||
// This handles builtin data explicitly as there are flatbuffer schemas.
|
||||
// If it returns kTfLiteOk, it passes the data out with `builtin_data`. The
|
||||
// calling function has to pass in an allocator object, and this allocator
|
||||
// will be called to reserve space for the output data. If the calling
|
||||
// function's allocator reserves memory on the heap, then it's the calling
|
||||
// function's responsibility to free it.
|
||||
// If it returns kTfLiteError, `builtin_data` will be `nullptr`.
|
||||
TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
// Converts the tensor data type used in the flat buffer to the representation
|
||||
// used by the runtime.
|
||||
TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
|
||||
62
code/lib/tfmicro/tensorflow/lite/core/api/op_resolver.cc
Normal file
62
code/lib/tfmicro/tensorflow/lite/core/api/op_resolver.cc
Normal file
@@ -0,0 +1,62 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus GetRegistrationFromOpCode(
|
||||
const OperatorCode* opcode, const OpResolver& op_resolver,
|
||||
ErrorReporter* error_reporter, const TfLiteRegistration** registration) {
|
||||
TfLiteStatus status = kTfLiteOk;
|
||||
*registration = nullptr;
|
||||
auto builtin_code = opcode->builtin_code();
|
||||
int version = opcode->version();
|
||||
|
||||
if (builtin_code > BuiltinOperator_MAX ||
|
||||
builtin_code < BuiltinOperator_MIN) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter,
|
||||
"Op builtin_code out of range: %d. Are you using old TFLite binary "
|
||||
"with newer model?",
|
||||
builtin_code);
|
||||
status = kTfLiteError;
|
||||
} else if (builtin_code != BuiltinOperator_CUSTOM) {
|
||||
*registration = op_resolver.FindOp(builtin_code, version);
|
||||
if (*registration == nullptr) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter,
|
||||
"Didn't find op for builtin opcode '%s' version '%d'\n",
|
||||
EnumNameBuiltinOperator(builtin_code), version);
|
||||
status = kTfLiteError;
|
||||
}
|
||||
} else if (!opcode->custom_code()) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter,
|
||||
"Operator with CUSTOM builtin_code has no custom_code.\n");
|
||||
status = kTfLiteError;
|
||||
} else {
|
||||
const char* name = opcode->custom_code()->c_str();
|
||||
*registration = op_resolver.FindOp(name, version);
|
||||
if (*registration == nullptr) {
|
||||
// Do not report error for unresolved custom op, we do the final check
|
||||
// while preparing ops.
|
||||
status = kTfLiteError;
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
48
code/lib/tfmicro/tensorflow/lite/core/api/op_resolver.h
Normal file
48
code/lib/tfmicro/tensorflow/lite/core/api/op_resolver.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
/// Abstract interface that returns TfLiteRegistrations given op codes or custom
|
||||
/// op names. This is the mechanism that ops being referenced in the flatbuffer
|
||||
/// model are mapped to executable function pointers (TfLiteRegistrations).
|
||||
class OpResolver {
|
||||
public:
|
||||
/// Finds the op registration for a builtin operator by enum code.
|
||||
virtual const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
|
||||
int version) const = 0;
|
||||
/// Finds the op registration of a custom operator by op name.
|
||||
virtual const TfLiteRegistration* FindOp(const char* op,
|
||||
int version) const = 0;
|
||||
virtual ~OpResolver() {}
|
||||
};
|
||||
|
||||
// Handles the logic for converting between an OperatorCode structure extracted
|
||||
// from a flatbuffer and information about a registered operator
|
||||
// implementation.
|
||||
TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
|
||||
const OpResolver& op_resolver,
|
||||
ErrorReporter* error_reporter,
|
||||
const TfLiteRegistration** registration);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
|
||||
48
code/lib/tfmicro/tensorflow/lite/core/api/tensor_utils.cc
Normal file
48
code/lib/tfmicro/tensorflow/lite/core/api/tensor_utils.cc
Normal file
@@ -0,0 +1,48 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/core/api/tensor_utils.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
|
||||
if (!tensor->is_variable) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
// TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it
|
||||
// to the value of the buffer.
|
||||
int value = 0;
|
||||
if (tensor->type == kTfLiteInt8) {
|
||||
value = tensor->params.zero_point;
|
||||
}
|
||||
// TODO(b/139446230): Provide a platform header to better handle these
|
||||
// specific scenarios.
|
||||
#if __ANDROID__ || defined(__x86_64__) || defined(__i386__) || \
|
||||
defined(__i386) || defined(__x86__) || defined(__X86__) || \
|
||||
defined(_X86_) || defined(_M_IX86) || defined(_M_X64)
|
||||
memset(tensor->data.raw, value, tensor->bytes);
|
||||
#else
|
||||
char* raw_ptr = tensor->data.raw;
|
||||
for (size_t i = 0; i < tensor->bytes; ++i) {
|
||||
*raw_ptr = value;
|
||||
raw_ptr++;
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
28
code/lib/tfmicro/tensorflow/lite/core/api/tensor_utils.h
Normal file
28
code/lib/tfmicro/tensorflow/lite/core/api/tensor_utils.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
|
||||
#define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Resets a variable tensor to the default value.
|
||||
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
|
||||
937
code/lib/tfmicro/tensorflow/lite/kernels/internal/common.h
Normal file
937
code/lib/tfmicro/tensorflow/lite/kernels/internal/common.h
Normal file
@@ -0,0 +1,937 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
|
||||
|
||||
#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
|
||||
#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
|
||||
#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
constexpr int kReverseShift = -1;
|
||||
|
||||
inline void GetActivationMinMax(FusedActivationFunctionType ac,
|
||||
float* output_activation_min,
|
||||
float* output_activation_max) {
|
||||
switch (ac) {
|
||||
case FusedActivationFunctionType::kNone:
|
||||
*output_activation_min = std::numeric_limits<float>::lowest();
|
||||
*output_activation_max = std::numeric_limits<float>::max();
|
||||
break;
|
||||
case FusedActivationFunctionType::kRelu:
|
||||
*output_activation_min = 0.f;
|
||||
*output_activation_max = std::numeric_limits<float>::max();
|
||||
break;
|
||||
case FusedActivationFunctionType::kRelu1:
|
||||
*output_activation_min = -1.f;
|
||||
*output_activation_max = 1.f;
|
||||
break;
|
||||
case FusedActivationFunctionType::kRelu6:
|
||||
*output_activation_min = 0.f;
|
||||
*output_activation_max = 6.f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inline float ActivationFunctionWithMinMax(float x, float output_activation_min,
|
||||
float output_activation_max) {
|
||||
return std::min(std::max(x, output_activation_min), output_activation_max);
|
||||
}
|
||||
|
||||
// Legacy function, left for compatibility only.
|
||||
template <FusedActivationFunctionType Ac>
|
||||
float ActivationFunction(float x) {
|
||||
float output_activation_min, output_activation_max;
|
||||
GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
|
||||
return ActivationFunctionWithMinMax(x, output_activation_min,
|
||||
output_activation_max);
|
||||
}
|
||||
|
||||
inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
|
||||
const float* bias_data, int array_size,
|
||||
float* array_data) {
|
||||
// Note: see b/132215220: in May 2019 we thought it would be OK to replace
|
||||
// this with the Eigen one-liner:
|
||||
// return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max).
|
||||
// This turned out to severely regress performance: +4ms (i.e. 8%) on
|
||||
// MobileNet v2 / 1.0 / 224. So we keep custom NEON code for now.
|
||||
TFLITE_DCHECK_EQ((array_size % bias_size), 0);
|
||||
#ifdef USE_NEON
|
||||
float* array_ptr = array_data;
|
||||
float* array_end_ptr = array_ptr + array_size;
|
||||
const auto clamp_min_vec = vdupq_n_f32(clamp_min);
|
||||
const auto clamp_max_vec = vdupq_n_f32(clamp_max);
|
||||
for (; array_ptr != array_end_ptr; array_ptr += bias_size) {
|
||||
int i = 0;
|
||||
for (; i <= bias_size - 16; i += 16) {
|
||||
auto b0 = vld1q_f32(bias_data + i);
|
||||
auto b1 = vld1q_f32(bias_data + i + 4);
|
||||
auto b2 = vld1q_f32(bias_data + i + 8);
|
||||
auto b3 = vld1q_f32(bias_data + i + 12);
|
||||
auto a0 = vld1q_f32(array_ptr + i);
|
||||
auto a1 = vld1q_f32(array_ptr + i + 4);
|
||||
auto a2 = vld1q_f32(array_ptr + i + 8);
|
||||
auto a3 = vld1q_f32(array_ptr + i + 12);
|
||||
auto x0 = vaddq_f32(a0, b0);
|
||||
auto x1 = vaddq_f32(a1, b1);
|
||||
auto x2 = vaddq_f32(a2, b2);
|
||||
auto x3 = vaddq_f32(a3, b3);
|
||||
x0 = vmaxq_f32(clamp_min_vec, x0);
|
||||
x1 = vmaxq_f32(clamp_min_vec, x1);
|
||||
x2 = vmaxq_f32(clamp_min_vec, x2);
|
||||
x3 = vmaxq_f32(clamp_min_vec, x3);
|
||||
x0 = vminq_f32(clamp_max_vec, x0);
|
||||
x1 = vminq_f32(clamp_max_vec, x1);
|
||||
x2 = vminq_f32(clamp_max_vec, x2);
|
||||
x3 = vminq_f32(clamp_max_vec, x3);
|
||||
vst1q_f32(array_ptr + i, x0);
|
||||
vst1q_f32(array_ptr + i + 4, x1);
|
||||
vst1q_f32(array_ptr + i + 8, x2);
|
||||
vst1q_f32(array_ptr + i + 12, x3);
|
||||
}
|
||||
for (; i <= bias_size - 4; i += 4) {
|
||||
auto b = vld1q_f32(bias_data + i);
|
||||
auto a = vld1q_f32(array_ptr + i);
|
||||
auto x = vaddq_f32(a, b);
|
||||
x = vmaxq_f32(clamp_min_vec, x);
|
||||
x = vminq_f32(clamp_max_vec, x);
|
||||
vst1q_f32(array_ptr + i, x);
|
||||
}
|
||||
for (; i < bias_size; i++) {
|
||||
array_ptr[i] = ActivationFunctionWithMinMax(array_ptr[i] + bias_data[i],
|
||||
clamp_min, clamp_max);
|
||||
}
|
||||
}
|
||||
#else // not NEON
|
||||
for (int array_offset = 0; array_offset < array_size;
|
||||
array_offset += bias_size) {
|
||||
for (int i = 0; i < bias_size; i++) {
|
||||
array_data[array_offset + i] = ActivationFunctionWithMinMax(
|
||||
array_data[array_offset + i] + bias_data[i], clamp_min, clamp_max);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
int32 x, int32 quantized_multiplier, int left_shift) {
|
||||
using gemmlowp::RoundingDivideByPOT;
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
return RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
int32 x, int32 quantized_multiplier, int left_shift) {
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
|
||||
quantized_multiplier);
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier,
|
||||
int shift) {
|
||||
using gemmlowp::RoundingDivideByPOT;
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
int left_shift = shift > 0 ? shift : 0;
|
||||
int right_shift = shift > 0 ? 0 : -shift;
|
||||
return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
|
||||
x * (1 << left_shift), quantized_multiplier),
|
||||
right_shift);
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplier(int64_t x,
|
||||
int32 quantized_multiplier,
|
||||
int shift) {
|
||||
// Inputs:
|
||||
// - quantized_multiplier has fixed point at bit 31
|
||||
// - shift is -31 to +7 (negative for right shift)
|
||||
//
|
||||
// Assumptions: The following input ranges are assumed
|
||||
// - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1)
|
||||
// - scaling is chosen so final scaled result fits in int32
|
||||
// - input x is in the range -(1<<47) <= x < (1<<47)
|
||||
assert(quantized_multiplier >= 0);
|
||||
assert(shift >= -31 && shift < 8);
|
||||
|
||||
int32_t reduced_multiplier = (quantized_multiplier + (1 << 15)) >> 16;
|
||||
int total_shift = 15 - shift;
|
||||
x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1));
|
||||
int32_t result = x >> total_shift;
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int CountLeadingZeros(T integer_input) {
|
||||
static_assert(std::is_unsigned<T>::value,
|
||||
"Only unsigned integer types handled.");
|
||||
#if defined(__GNUC__)
|
||||
return integer_input ? __builtin_clz(integer_input)
|
||||
: std::numeric_limits<T>::digits;
|
||||
#else
|
||||
if (integer_input == 0) {
|
||||
return std::numeric_limits<T>::digits;
|
||||
}
|
||||
|
||||
const T one_in_leading_positive = static_cast<T>(1)
|
||||
<< (std::numeric_limits<T>::digits - 1);
|
||||
int leading_zeros = 0;
|
||||
while (integer_input < one_in_leading_positive) {
|
||||
integer_input <<= 1;
|
||||
++leading_zeros;
|
||||
}
|
||||
return leading_zeros;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline int CountLeadingSignBits(T integer_input) {
|
||||
static_assert(std::is_signed<T>::value, "Only signed integer types handled.");
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
return integer_input ? __builtin_clrsb(integer_input)
|
||||
: std::numeric_limits<T>::digits;
|
||||
#else
|
||||
using U = typename std::make_unsigned<T>::type;
|
||||
return integer_input >= 0
|
||||
? CountLeadingZeros(static_cast<U>(integer_input)) - 1
|
||||
: integer_input != std::numeric_limits<T>::min()
|
||||
? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
|
||||
: 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Use "count leading zeros" helper functions to do a fast Floor(log_2(x)).
|
||||
template <typename Integer>
|
||||
inline Integer FloorLog2(Integer n) {
|
||||
static_assert(std::is_integral<Integer>::value, "");
|
||||
static_assert(std::is_signed<Integer>::value, "");
|
||||
static_assert(sizeof(Integer) == 4 || sizeof(Integer) == 8, "");
|
||||
TFLITE_CHECK_GT(n, 0);
|
||||
if (sizeof(Integer) == 4) {
|
||||
return 30 - CountLeadingSignBits(n);
|
||||
} else {
|
||||
return 62 - CountLeadingSignBits(n);
|
||||
}
|
||||
}
|
||||
|
||||
// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
|
||||
// softmax
|
||||
inline void gen_lut(const std::function<double(double)>& func, double min,
|
||||
double max, int16_t* table, const int num) {
|
||||
// size of table should equal to num + 1
|
||||
// last element only for slope calculation
|
||||
double step = (max - min) / (num - 1);
|
||||
double half_step = step / 2.0;
|
||||
for (int i = 0; i < num - 1; i++) {
|
||||
double sample_val = TfLiteRound(func(min + i * step) * 32768.0);
|
||||
double midpoint_interp_val =
|
||||
TfLiteRound((func(min + (i + 1) * step) * 32768.0 +
|
||||
TfLiteRound(func(min + i * step) * 32768.0)) /
|
||||
2.0);
|
||||
double midpoint_val =
|
||||
TfLiteRound(func(min + i * step + half_step) * 32768.0);
|
||||
double midpoint_err = midpoint_interp_val - midpoint_val;
|
||||
double bias = TfLiteRound(midpoint_err / 2.0);
|
||||
table[i] = std::min(std::max(sample_val - bias, -32768.0), 32767.0);
|
||||
}
|
||||
table[num - 1] =
|
||||
std::min(std::max(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
|
||||
}
|
||||
|
||||
// int16 func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
|
||||
inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
|
||||
// 512 base value, lut[513] only for calculate slope
|
||||
uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
|
||||
assert(index < 512 && "LUT index out of range.");
|
||||
int16_t offset = value & 0x7f;
|
||||
|
||||
// base and slope are Q0.15
|
||||
int16_t base = lut[index];
|
||||
int16_t slope = lut[index + 1] - lut[index];
|
||||
|
||||
// Q0.15 * Q0.7 = Q0.22
|
||||
// Round and convert from Q0.22 to Q0.15
|
||||
int32_t delta = (static_cast<int32_t>(slope) * offset + 64) >> 7;
|
||||
|
||||
// Q0.15 + Q0.15
|
||||
return base + delta;
|
||||
}
|
||||
|
||||
// Table of sigmoid(i/24) at 0.16 format - 256 elements.
|
||||
|
||||
// We use combined sigmoid and tanh look-up table, since
|
||||
// tanh(x) = 2*sigmoid(2*x) -1.
|
||||
// Both functions are symmetric, so the LUT table is only needed
|
||||
// for the absolute value of the input.
|
||||
static const uint16_t sigmoid_table_uint16[256] = {
|
||||
32768, 33451, 34133, 34813, 35493, 36169, 36843, 37513, 38180, 38841, 39498,
|
||||
40149, 40794, 41432, 42064, 42688, 43304, 43912, 44511, 45102, 45683, 46255,
|
||||
46817, 47369, 47911, 48443, 48964, 49475, 49975, 50464, 50942, 51409, 51865,
|
||||
52311, 52745, 53169, 53581, 53983, 54374, 54755, 55125, 55485, 55834, 56174,
|
||||
56503, 56823, 57133, 57433, 57724, 58007, 58280, 58544, 58800, 59048, 59288,
|
||||
59519, 59743, 59959, 60168, 60370, 60565, 60753, 60935, 61110, 61279, 61441,
|
||||
61599, 61750, 61896, 62036, 62172, 62302, 62428, 62549, 62666, 62778, 62886,
|
||||
62990, 63090, 63186, 63279, 63368, 63454, 63536, 63615, 63691, 63765, 63835,
|
||||
63903, 63968, 64030, 64090, 64148, 64204, 64257, 64308, 64357, 64405, 64450,
|
||||
64494, 64536, 64576, 64614, 64652, 64687, 64721, 64754, 64786, 64816, 64845,
|
||||
64873, 64900, 64926, 64950, 64974, 64997, 65019, 65039, 65060, 65079, 65097,
|
||||
65115, 65132, 65149, 65164, 65179, 65194, 65208, 65221, 65234, 65246, 65258,
|
||||
65269, 65280, 65291, 65301, 65310, 65319, 65328, 65337, 65345, 65352, 65360,
|
||||
65367, 65374, 65381, 65387, 65393, 65399, 65404, 65410, 65415, 65420, 65425,
|
||||
65429, 65433, 65438, 65442, 65445, 65449, 65453, 65456, 65459, 65462, 65465,
|
||||
65468, 65471, 65474, 65476, 65479, 65481, 65483, 65485, 65488, 65489, 65491,
|
||||
65493, 65495, 65497, 65498, 65500, 65501, 65503, 65504, 65505, 65507, 65508,
|
||||
65509, 65510, 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65517, 65518,
|
||||
65519, 65520, 65520, 65521, 65522, 65522, 65523, 65523, 65524, 65524, 65525,
|
||||
65525, 65526, 65526, 65526, 65527, 65527, 65528, 65528, 65528, 65529, 65529,
|
||||
65529, 65529, 65530, 65530, 65530, 65530, 65531, 65531, 65531, 65531, 65531,
|
||||
65532, 65532, 65532, 65532, 65532, 65532, 65533, 65533, 65533, 65533, 65533,
|
||||
65533, 65533, 65533, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
|
||||
65534, 65534, 65535};
|
||||
|
||||
// TODO(b/77858996): Add these to gemmlowp.
|
||||
template <typename IntegerType>
|
||||
IntegerType SaturatingAddNonGemmlowp(IntegerType a, IntegerType b) {
|
||||
static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
|
||||
return a;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline std::int32_t SaturatingAddNonGemmlowp(std::int32_t a, std::int32_t b) {
|
||||
std::int64_t a64 = a;
|
||||
std::int64_t b64 = b;
|
||||
std::int64_t sum = a64 + b64;
|
||||
return static_cast<std::int32_t>(std::min(
|
||||
static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
|
||||
std::max(
|
||||
static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
|
||||
sum)));
|
||||
}
|
||||
|
||||
template <typename tRawType, int tIntegerBits>
|
||||
gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingAddNonGemmlowp(
|
||||
gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
|
||||
gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
|
||||
return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
|
||||
SaturatingAddNonGemmlowp(a.raw(), b.raw()));
|
||||
}
|
||||
|
||||
template <typename IntegerType>
|
||||
IntegerType SaturatingSub(IntegerType a, IntegerType b) {
|
||||
static_assert(std::is_same<IntegerType, void>::value, "unimplemented");
|
||||
return a;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline std::int16_t SaturatingSub(std::int16_t a, std::int16_t b) {
|
||||
std::int32_t a32 = a;
|
||||
std::int32_t b32 = b;
|
||||
std::int32_t diff = a32 - b32;
|
||||
return static_cast<std::int16_t>(
|
||||
std::min(static_cast<int32_t>(32767),
|
||||
std::max(static_cast<int32_t>(-32768), diff)));
|
||||
}
|
||||
|
||||
template <>
|
||||
inline std::int32_t SaturatingSub(std::int32_t a, std::int32_t b) {
|
||||
std::int64_t a64 = a;
|
||||
std::int64_t b64 = b;
|
||||
std::int64_t diff = a64 - b64;
|
||||
return static_cast<std::int32_t>(std::min(
|
||||
static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::max()),
|
||||
std::max(
|
||||
static_cast<std::int64_t>(std::numeric_limits<std::int32_t>::min()),
|
||||
diff)));
|
||||
}
|
||||
|
||||
template <typename tRawType, int tIntegerBits>
|
||||
gemmlowp::FixedPoint<tRawType, tIntegerBits> SaturatingSub(
|
||||
gemmlowp::FixedPoint<tRawType, tIntegerBits> a,
|
||||
gemmlowp::FixedPoint<tRawType, tIntegerBits> b) {
|
||||
return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
|
||||
SaturatingSub(a.raw(), b.raw()));
|
||||
}
|
||||
// End section to be moved to gemmlowp.
|
||||
|
||||
template <typename IntegerType>
|
||||
IntegerType SaturatingRoundingMultiplyByPOTParam(IntegerType x, int exponent) {
|
||||
if (exponent == 0) {
|
||||
return x;
|
||||
}
|
||||
using ScalarIntegerType =
|
||||
typename gemmlowp::FixedPointRawTypeTraits<IntegerType>::ScalarRawType;
|
||||
const IntegerType min =
|
||||
gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::min());
|
||||
const IntegerType max =
|
||||
gemmlowp::Dup<IntegerType>(std::numeric_limits<ScalarIntegerType>::max());
|
||||
const int ScalarIntegerTypeBits = 8 * sizeof(ScalarIntegerType);
|
||||
|
||||
const std::int32_t threshold =
|
||||
((1 << (ScalarIntegerTypeBits - 1 - exponent)) - 1);
|
||||
const IntegerType positive_mask =
|
||||
gemmlowp::MaskIfGreaterThan(x, gemmlowp::Dup<IntegerType>(threshold));
|
||||
const IntegerType negative_mask =
|
||||
gemmlowp::MaskIfLessThan(x, gemmlowp::Dup<IntegerType>(-threshold));
|
||||
|
||||
IntegerType result = gemmlowp::ShiftLeft(x, exponent);
|
||||
result = gemmlowp::SelectUsingMask(positive_mask, max, result);
|
||||
result = gemmlowp::SelectUsingMask(negative_mask, min, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// If we want to leave IntegerBits fixed, then multiplication
|
||||
// by a power of two has to be saturating/rounding, not exact anymore.
|
||||
template <typename tRawType, int tIntegerBits>
|
||||
gemmlowp::FixedPoint<tRawType, tIntegerBits>
|
||||
SaturatingRoundingMultiplyByPOTParam(
|
||||
gemmlowp::FixedPoint<tRawType, tIntegerBits> a, int exponent) {
|
||||
return gemmlowp::FixedPoint<tRawType, tIntegerBits>::FromRaw(
|
||||
SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
|
||||
}
|
||||
|
||||
// Minimum output bits to accommodate log of maximum input range. It actually
|
||||
// does not matter if one considers, say, [-64,64] or [-64,64).
|
||||
//
|
||||
// For example, run this through Octave:
|
||||
// [0:127; ...
|
||||
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
|
||||
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
|
||||
constexpr int min_log_x_output_bits(int input_bits) {
|
||||
return input_bits > 90
|
||||
? 7
|
||||
: input_bits > 44
|
||||
? 6
|
||||
: input_bits > 21
|
||||
? 5
|
||||
: input_bits > 10
|
||||
? 4
|
||||
: input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1;
|
||||
}
|
||||
|
||||
// Although currently the name of this function says that it cannot handle
|
||||
// values less than 1, in practice it can handle as low as 1/x_max, where
|
||||
// x_max is the largest representable input. In other words, the output range
|
||||
// is symmetric.
|
||||
template <int OutputIntegerBits, int InputIntegerBits>
|
||||
inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
|
||||
log_x_for_x_greater_than_or_equal_to_1_impl(
|
||||
gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
|
||||
// assert(__builtin_clz(0u) >= std::numeric_limits<uint32>::digits - 1);
|
||||
// assert(__builtin_clz(0u) <= std::numeric_limits<uint32>::digits);
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
|
||||
// The reason for accumulating the result with an extra bit of headroom is
|
||||
// that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
|
||||
// recip_denom will otherwise introduce an error.
|
||||
static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
|
||||
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumIntegerBits>;
|
||||
|
||||
const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
|
||||
FixedPoint0, 1488522236, std::log(2.0));
|
||||
const FixedPoint0 sqrt_sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
|
||||
FixedPoint0, 1805811301, std::sqrt(std::sqrt(0.5)));
|
||||
const FixedPoint0 sqrt_half = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
|
||||
FixedPoint0, 1518500250, std::sqrt(0.5));
|
||||
const FixedPoint0 one_quarter =
|
||||
GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPoint0, 536870912, 1.0 / 4.0);
|
||||
|
||||
const FixedPoint0 alpha_n = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
|
||||
FixedPoint0, 117049297, 11.0 / 240.0 * std::sqrt(std::sqrt(2.0)));
|
||||
const FixedPoint0 alpha_d = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
|
||||
FixedPoint0, 127690142, 1.0 / 20.0 * std::sqrt(std::sqrt(2.0)));
|
||||
const FixedPoint0 alpha_i = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
|
||||
FixedPoint0, 1057819769,
|
||||
2.0 / std::sqrt(std::sqrt(2.0)) - std::sqrt(std::sqrt(2.0)));
|
||||
const FixedPoint0 alpha_f = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
|
||||
FixedPoint0, 638450708, 1.0 / 4.0 * std::sqrt(std::sqrt(2.0)));
|
||||
|
||||
const FixedPointAccum shifted_quarter =
|
||||
gemmlowp::Rescale<kAccumIntegerBits>(one_quarter);
|
||||
|
||||
// Reinterpret the input value as Q0.31, because we will figure out the
|
||||
// required shift "ourselves" instead of using, say, Rescale.
|
||||
FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
|
||||
// z_a_pow_2 = input_integer_bits - z_a_headroom;
|
||||
int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32>(z_a.raw()));
|
||||
FixedPoint0 r_a_tmp =
|
||||
SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
|
||||
const int32 r_a_raw =
|
||||
SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
|
||||
// z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
|
||||
// z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
|
||||
// InputIntegerBits - z_b_headroom - 0.25);
|
||||
const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
|
||||
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
|
||||
InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)),
|
||||
shifted_quarter);
|
||||
|
||||
// z_b is treated like z_a, but premultiplying by sqrt(0.5).
|
||||
FixedPoint0 z_b = z_a * sqrt_half;
|
||||
int z_b_headroom = CountLeadingZeros(static_cast<uint32>(z_b.raw())) - 1;
|
||||
const int32 r_b_raw =
|
||||
SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
|
||||
const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
|
||||
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
|
||||
InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)),
|
||||
shifted_quarter);
|
||||
|
||||
const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));
|
||||
const FixedPointAccum z_pow_2_adj = FixedPointAccum::FromRaw(
|
||||
std::max(z_a_pow_2_adj.raw(), z_b_pow_2_adj.raw()));
|
||||
|
||||
const FixedPoint0 p = gemmlowp::RoundingHalfSum(r, sqrt_sqrt_half);
|
||||
FixedPoint0 q = r - sqrt_sqrt_half;
|
||||
q = q + q;
|
||||
|
||||
const FixedPoint0 common_sq = q * q;
|
||||
const FixedPoint0 num = q * r + q * common_sq * alpha_n;
|
||||
const FixedPoint0 denom_minus_one_0 =
|
||||
p * (alpha_i + q + alpha_d * common_sq) + alpha_f * q;
|
||||
const FixedPoint0 recip_denom =
|
||||
one_over_one_plus_x_for_x_in_0_1(denom_minus_one_0);
|
||||
|
||||
const FixedPointAccum num_scaled = gemmlowp::Rescale<kAccumIntegerBits>(num);
|
||||
return gemmlowp::Rescale<OutputIntegerBits>(z_pow_2_adj * log_2 +
|
||||
num_scaled * recip_denom);
|
||||
}
|
||||
|
||||
template <int OutputIntegerBits, int InputIntegerBits>
|
||||
inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
|
||||
log_x_for_x_greater_than_or_equal_to_1(
|
||||
gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
|
||||
static_assert(
|
||||
OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
|
||||
"Output integer bits must be sufficient to accommodate logs of inputs.");
|
||||
return log_x_for_x_greater_than_or_equal_to_1_impl<OutputIntegerBits,
|
||||
InputIntegerBits>(
|
||||
input_val);
|
||||
}
|
||||
|
||||
inline int32 GetReciprocal(int32 x, int x_integer_digits,
|
||||
int* num_bits_over_unit) {
|
||||
int headroom_plus_one = CountLeadingZeros(static_cast<uint32>(x));
|
||||
// This is the number of bits to the left of the binary point above 1.0.
|
||||
// Consider x=1.25. In that case shifted_scale=0.8 and
|
||||
// no later adjustment will be needed.
|
||||
*num_bits_over_unit = x_integer_digits - headroom_plus_one;
|
||||
const int32 shifted_sum_minus_one =
|
||||
static_cast<int32>((static_cast<uint32>(x) << headroom_plus_one) -
|
||||
(static_cast<uint32>(1) << 31));
|
||||
|
||||
gemmlowp::FixedPoint<int32, 0> shifted_scale =
|
||||
gemmlowp::one_over_one_plus_x_for_x_in_0_1(
|
||||
gemmlowp::FixedPoint<int32, 0>::FromRaw(shifted_sum_minus_one));
|
||||
return shifted_scale.raw();
|
||||
}
|
||||
|
||||
inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
|
||||
int32* output_inv_sqrt,
|
||||
int* output_shift) {
|
||||
TFLITE_DCHECK_GE(input, 0);
|
||||
if (input <= 1) {
|
||||
// Handle the input value 1 separately to avoid overflow in that case
|
||||
// in the general computation below (b/143972021). Also handle 0 as if it
|
||||
// were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
|
||||
// but rare/unrealistic input value. We can expect both to occur in some
|
||||
// incompletely trained models, but probably not in fully trained models.
|
||||
*output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
|
||||
*output_shift = 0;
|
||||
return;
|
||||
}
|
||||
TFLITE_DCHECK_GT(input, 1);
|
||||
*output_shift = 11;
|
||||
while (input >= (1 << 29)) {
|
||||
input /= 4;
|
||||
++*output_shift;
|
||||
}
|
||||
const unsigned max_left_shift_bits =
|
||||
CountLeadingZeros(static_cast<uint32>(input)) - 1;
|
||||
const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
|
||||
const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
|
||||
*output_shift -= left_shift_bit_pairs;
|
||||
input <<= 2 * left_shift_bit_pairs;
|
||||
TFLITE_DCHECK_GE(input, (1 << 27));
|
||||
TFLITE_DCHECK_LT(input, (1 << 29));
|
||||
using gemmlowp::FixedPoint;
|
||||
using gemmlowp::Rescale;
|
||||
using gemmlowp::SaturatingRoundingMultiplyByPOT;
|
||||
// Using 3 integer bits gives us enough room for the internal arithmetic in
|
||||
// this Newton-Raphson iteration.
|
||||
using F3 = FixedPoint<int32, 3>;
|
||||
using F0 = FixedPoint<int32, 0>;
|
||||
const F3 fixedpoint_input = F3::FromRaw(input >> 1);
|
||||
const F3 fixedpoint_half_input =
|
||||
SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
|
||||
const F3 fixedpoint_half_three =
|
||||
GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
|
||||
// Newton-Raphson iteration
|
||||
// Naive unoptimized starting guess: x = 1
|
||||
F3 x = F3::One();
|
||||
// Naive unoptimized number of iterations: 5
|
||||
for (int i = 0; i < 5; i++) {
|
||||
const F3 x3 = Rescale<3>(x * x * x);
|
||||
x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
|
||||
}
|
||||
const F0 fixedpoint_half_sqrt_2 =
|
||||
GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
|
||||
x = x * fixedpoint_half_sqrt_2;
|
||||
*output_inv_sqrt = x.raw();
|
||||
if (*output_shift < 0) {
|
||||
*output_inv_sqrt <<= -*output_shift;
|
||||
*output_shift = 0;
|
||||
}
|
||||
// Convert right shift (right is positive) to left shift.
|
||||
*output_shift *= reverse_shift;
|
||||
}
|
||||
|
||||
// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
|
||||
// BROADCASTING.
|
||||
//
|
||||
// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
|
||||
// rectangular array of numbers.
|
||||
//
|
||||
// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
|
||||
// However, as Dims<N> is to be deprecated, this class exists as an adaptor
|
||||
// to enable simple unoptimized implementations of element-wise broadcasting
|
||||
// operations.
|
||||
template <int N>
|
||||
struct NdArrayDesc {
|
||||
// The "extent" of each dimension. Indices along dimension d must be in the
|
||||
// half-open interval [0, extents[d]).
|
||||
int extents[N];
|
||||
|
||||
// The number of *elements* (not bytes) between consecutive indices of each
|
||||
// dimension.
|
||||
int strides[N];
|
||||
};
|
||||
|
||||
// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
|
||||
// BROADCASTING.
|
||||
//
|
||||
// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
|
||||
inline int SubscriptToIndex(const NdArrayDesc<4>& desc, int i0, int i1, int i2,
|
||||
int i3) {
|
||||
TFLITE_DCHECK(i0 >= 0 && i0 < desc.extents[0]);
|
||||
TFLITE_DCHECK(i1 >= 0 && i1 < desc.extents[1]);
|
||||
TFLITE_DCHECK(i2 >= 0 && i2 < desc.extents[2]);
|
||||
TFLITE_DCHECK(i3 >= 0 && i3 < desc.extents[3]);
|
||||
return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] +
|
||||
i3 * desc.strides[3];
|
||||
}
|
||||
|
||||
inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) {
|
||||
return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
|
||||
indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
|
||||
indexes[4] * desc.strides[4];
|
||||
}
|
||||
|
||||
// Given the dimensions of the operands for an element-wise binary broadcast,
|
||||
// adjusts them so that they can be directly iterated over with simple loops.
|
||||
// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and
|
||||
// 'desc1_out'. 'desc0_out' and 'desc1_out' cannot be nullptr.
|
||||
//
|
||||
// This function assumes that the two input shapes are compatible up to
|
||||
// broadcasting and the shorter one has already been prepended with 1s to be the
|
||||
// same length. E.g., if shape0 is (1, 16, 16, 64) and shape1 is (1, 64),
|
||||
// shape1 must already have been prepended to be (1, 1, 1, 64). Recall that
|
||||
// Dims<N> refer to shapes in reverse order. In this case, input0_dims will be
|
||||
// (64, 16, 16, 1) and input1_dims will be (64, 1, 1, 1).
|
||||
//
|
||||
// When two shapes are compatible up to broadcasting, for each dimension d,
|
||||
// the input extents are either equal, or one of them is 1.
|
||||
//
|
||||
// This function performs the following for each dimension d:
|
||||
// - If the extents are equal, then do nothing since the loop that walks over
|
||||
// both of the input arrays is correct.
|
||||
// - Otherwise, one (and only one) of the extents must be 1. Say extent0 is 1
|
||||
// and extent1 is e1. Then set extent0 to e1 and stride0 *to 0*. This allows
|
||||
// array0 to be referenced *at any index* in dimension d and still access the
|
||||
// same slice.
|
||||
template <int N>
|
||||
inline void NdArrayDescsForElementwiseBroadcast(const Dims<N>& input0_dims,
|
||||
const Dims<N>& input1_dims,
|
||||
NdArrayDesc<N>* desc0_out,
|
||||
NdArrayDesc<N>* desc1_out) {
|
||||
TFLITE_DCHECK(desc0_out != nullptr);
|
||||
TFLITE_DCHECK(desc1_out != nullptr);
|
||||
|
||||
// Copy dims to desc.
|
||||
for (int i = 0; i < N; ++i) {
|
||||
desc0_out->extents[i] = input0_dims.sizes[i];
|
||||
desc0_out->strides[i] = input0_dims.strides[i];
|
||||
desc1_out->extents[i] = input1_dims.sizes[i];
|
||||
desc1_out->strides[i] = input1_dims.strides[i];
|
||||
}
|
||||
|
||||
// Walk over each dimension. If the extents are equal do nothing.
|
||||
// Otherwise, set the desc with extent 1 to have extent equal to the other and
|
||||
// stride 0.
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const int extent0 = ArraySize(input0_dims, i);
|
||||
const int extent1 = ArraySize(input1_dims, i);
|
||||
if (extent0 != extent1) {
|
||||
if (extent0 == 1) {
|
||||
desc0_out->strides[i] = 0;
|
||||
desc0_out->extents[i] = extent1;
|
||||
} else {
|
||||
TFLITE_DCHECK_EQ(extent1, 1);
|
||||
desc1_out->strides[i] = 0;
|
||||
desc1_out->extents[i] = extent0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Copies dims to desc, calculating strides.
|
||||
template <int N>
|
||||
inline void CopyDimsToDesc(const RuntimeShape& input_shape,
|
||||
NdArrayDesc<N>* desc_out) {
|
||||
int desc_stride = 1;
|
||||
for (int i = N - 1; i >= 0; --i) {
|
||||
desc_out->extents[i] = input_shape.Dims(i);
|
||||
desc_out->strides[i] = desc_stride;
|
||||
desc_stride *= input_shape.Dims(i);
|
||||
}
|
||||
}
|
||||
|
||||
template <int N>
|
||||
inline void NdArrayDescsForElementwiseBroadcast(
|
||||
const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
|
||||
NdArrayDesc<N>* desc0_out, NdArrayDesc<N>* desc1_out) {
|
||||
TFLITE_DCHECK(desc0_out != nullptr);
|
||||
TFLITE_DCHECK(desc1_out != nullptr);
|
||||
|
||||
auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
|
||||
auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
|
||||
|
||||
// Copy dims to desc, calculating strides.
|
||||
CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
|
||||
CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
|
||||
|
||||
// Walk over each dimension. If the extents are equal do nothing.
|
||||
// Otherwise, set the desc with extent 1 to have extent equal to the other and
|
||||
// stride 0.
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const int extent0 = extended_input0_shape.Dims(i);
|
||||
const int extent1 = extended_input1_shape.Dims(i);
|
||||
if (extent0 != extent1) {
|
||||
if (extent0 == 1) {
|
||||
desc0_out->strides[i] = 0;
|
||||
desc0_out->extents[i] = extent1;
|
||||
} else {
|
||||
TFLITE_DCHECK_EQ(extent1, 1);
|
||||
desc1_out->strides[i] = 0;
|
||||
desc1_out->extents[i] = extent0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int N>
|
||||
inline void NdArrayDescsForElementwiseBroadcast(
|
||||
const RuntimeShape& input0_shape, const RuntimeShape& input1_shape,
|
||||
const RuntimeShape& input2_shape, NdArrayDesc<N>* desc0_out,
|
||||
NdArrayDesc<N>* desc1_out, NdArrayDesc<N>* desc2_out) {
|
||||
TFLITE_DCHECK(desc0_out != nullptr);
|
||||
TFLITE_DCHECK(desc1_out != nullptr);
|
||||
TFLITE_DCHECK(desc2_out != nullptr);
|
||||
|
||||
auto extended_input0_shape = RuntimeShape::ExtendedShape(N, input0_shape);
|
||||
auto extended_input1_shape = RuntimeShape::ExtendedShape(N, input1_shape);
|
||||
auto extended_input2_shape = RuntimeShape::ExtendedShape(N, input2_shape);
|
||||
|
||||
// Copy dims to desc, calculating strides.
|
||||
CopyDimsToDesc<N>(extended_input0_shape, desc0_out);
|
||||
CopyDimsToDesc<N>(extended_input1_shape, desc1_out);
|
||||
CopyDimsToDesc<N>(extended_input2_shape, desc2_out);
|
||||
|
||||
// Walk over each dimension. If the extents are equal do nothing.
|
||||
// Otherwise, set the desc with extent 1 to have extent equal to the other and
|
||||
// stride 0.
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const int extent0 = extended_input0_shape.Dims(i);
|
||||
const int extent1 = extended_input1_shape.Dims(i);
|
||||
const int extent2 = extended_input2_shape.Dims(i);
|
||||
|
||||
int extent = extent0;
|
||||
if (extent1 != 1) extent = extent1;
|
||||
if (extent2 != 1) extent = extent2;
|
||||
|
||||
TFLITE_DCHECK(extent0 == 1 || extent0 == extent);
|
||||
TFLITE_DCHECK(extent1 == 1 || extent1 == extent);
|
||||
TFLITE_DCHECK(extent2 == 1 || extent2 == extent);
|
||||
|
||||
if (!(extent0 == extent1 && extent1 == extent2)) {
|
||||
if (extent0 == 1) {
|
||||
desc0_out->strides[i] = 0;
|
||||
desc0_out->extents[i] = extent;
|
||||
}
|
||||
if (extent1 == 1) {
|
||||
desc1_out->strides[i] = 0;
|
||||
desc1_out->extents[i] = extent;
|
||||
}
|
||||
if (extent2 == 1) {
|
||||
desc2_out->strides[i] = 0;
|
||||
desc2_out->extents[i] = extent;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Detailed implementation of NDOpsHelper, the indexes must be a zero array.
|
||||
// This implementation is equivalent to N nested loops. Ex, if N=4, it can be
|
||||
// re-writen as:
|
||||
// for (int b = 0; b < output.extents[0]; ++b) {
|
||||
// for (int y = 0; y < output.extents[1]; ++y) {
|
||||
// for (int x = 0; x < output.extents[2]; ++x) {
|
||||
// for (int c = 0; c < output.extents[3]; ++c) {
|
||||
// calc({b,y,x,c});
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
template <int N, int DIM, typename Calc>
|
||||
typename std::enable_if<DIM != N - 1, void>::type NDOpsHelperImpl(
|
||||
const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
|
||||
for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
|
||||
NDOpsHelperImpl<N, DIM + 1, Calc>(output, calc, indexes);
|
||||
}
|
||||
}
|
||||
|
||||
template <int N, int DIM, typename Calc>
|
||||
typename std::enable_if<DIM == N - 1, void>::type NDOpsHelperImpl(
|
||||
const NdArrayDesc<N>& output, const Calc& calc, int indexes[N]) {
|
||||
for (indexes[DIM] = 0; indexes[DIM] < output.extents[DIM]; ++indexes[DIM]) {
|
||||
calc(indexes);
|
||||
}
|
||||
}
|
||||
|
||||
// Execute the calc function in the innermost iteration based on the shape of
|
||||
// the output. The calc function should take a single argument of type int[N].
|
||||
template <int N, typename Calc>
|
||||
inline void NDOpsHelper(const NdArrayDesc<N>& output, const Calc& calc) {
|
||||
int indexes[N] = {0};
|
||||
NDOpsHelperImpl<N, 0, Calc>(output, calc, indexes);
|
||||
}
|
||||
// Copied from gemmlowp::RoundDown when we dropped direct dependency on
|
||||
// gemmlowp.
|
||||
//
|
||||
// Returns the runtime argument rounded down to the nearest multiple of
|
||||
// the fixed Modulus.
|
||||
template <unsigned Modulus, typename Integer>
|
||||
Integer RoundDown(Integer i) {
|
||||
return i - (i % Modulus);
|
||||
}
|
||||
|
||||
// Copied from gemmlowp::RoundUp when we dropped direct dependency on
|
||||
// gemmlowp.
|
||||
//
|
||||
// Returns the runtime argument rounded up to the nearest multiple of
|
||||
// the fixed Modulus.
|
||||
template <unsigned Modulus, typename Integer>
|
||||
Integer RoundUp(Integer i) {
|
||||
return RoundDown<Modulus>(i + Modulus - 1);
|
||||
}
|
||||
|
||||
// Copied from gemmlowp::CeilQuotient when we dropped direct dependency on
|
||||
// gemmlowp.
|
||||
//
|
||||
// Returns the quotient a / b rounded up ('ceil') to the nearest integer.
|
||||
template <typename Integer>
|
||||
Integer CeilQuotient(Integer a, Integer b) {
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
// This function is a copy of gemmlowp::HowManyThreads, copied when we dropped
|
||||
// the direct dependency of internal/optimized/ on gemmlowp.
|
||||
//
|
||||
// It computes a reasonable number of threads to use for a GEMM of shape
|
||||
// (rows, cols, depth).
|
||||
//
|
||||
// TODO(b/131910176): get rid of this function by switching each call site
|
||||
// to its own more sensible logic for its own workload.
|
||||
template <int KernelRows>
|
||||
inline int LegacyHowManyThreads(int max_num_threads, int rows, int cols,
|
||||
int depth) {
|
||||
// Early-exit in the default case where multi-threading is disabled.
|
||||
if (max_num_threads == 1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Ensure that each thread has KernelRows rows to process, if at all possible.
|
||||
int thread_count = std::min(max_num_threads, rows / KernelRows);
|
||||
|
||||
// Limit the number of threads according to the overall size of the problem.
|
||||
if (thread_count > 1) {
|
||||
// Empirically determined value.
|
||||
static constexpr std::uint64_t min_cubic_size_per_thread = 64 * 1024;
|
||||
|
||||
// We can only multiply two out of three sizes without risking overflow
|
||||
const std::uint64_t cubic_size =
|
||||
std::uint64_t(rows) * std::uint64_t(cols) * std::uint64_t(depth);
|
||||
|
||||
thread_count = std::min(
|
||||
thread_count, static_cast<int>(cubic_size / min_cubic_size_per_thread));
|
||||
}
|
||||
|
||||
if (thread_count < 1) {
|
||||
thread_count = 1;
|
||||
}
|
||||
|
||||
assert(thread_count > 0 && thread_count <= max_num_threads);
|
||||
return thread_count;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void optimized_ops_preload_l1_stream(const T* ptr) {
|
||||
#ifdef __GNUC__
|
||||
// builtin offered by GCC-compatible compilers including clang
|
||||
__builtin_prefetch(ptr, /* 0 means read */ 0, /* 0 means no locality */ 0);
|
||||
#else
|
||||
(void)ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void optimized_ops_preload_l1_keep(const T* ptr) {
|
||||
#ifdef __GNUC__
|
||||
// builtin offered by GCC-compatible compilers including clang
|
||||
__builtin_prefetch(ptr, /* 0 means read */ 0, /* 3 means high locality */ 3);
|
||||
#else
|
||||
(void)ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void optimized_ops_prefetch_write_l1_keep(const T* ptr) {
|
||||
#ifdef __GNUC__
|
||||
// builtin offered by GCC-compatible compilers including clang
|
||||
__builtin_prefetch(ptr, /* 1 means write */ 1, /* 3 means high locality */ 3);
|
||||
#else
|
||||
(void)ptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
|
||||
@@ -0,0 +1,110 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
#ifndef TFLITE_DCHECK
|
||||
#define TFLITE_DCHECK(condition) (condition) ? (void)0 : TFLITE_ASSERT_FALSE
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_DCHECK_EQ
|
||||
#define TFLITE_DCHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_DCHECK_NE
|
||||
#define TFLITE_DCHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_DCHECK_GE
|
||||
#define TFLITE_DCHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_DCHECK_GT
|
||||
#define TFLITE_DCHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_DCHECK_LE
|
||||
#define TFLITE_DCHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_DCHECK_LT
|
||||
#define TFLITE_DCHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ASSERT_FALSE
|
||||
#endif
|
||||
|
||||
// TODO(ahentz): Clean up: We should stick to the DCHECK versions.
|
||||
#ifndef TFLITE_CHECK
|
||||
#define TFLITE_CHECK(condition) (condition) ? (void)0 : TFLITE_ABORT
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_CHECK_EQ
|
||||
#define TFLITE_CHECK_EQ(x, y) ((x) == (y)) ? (void)0 : TFLITE_ABORT
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_CHECK_NE
|
||||
#define TFLITE_CHECK_NE(x, y) ((x) != (y)) ? (void)0 : TFLITE_ABORT
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_CHECK_GE
|
||||
#define TFLITE_CHECK_GE(x, y) ((x) >= (y)) ? (void)0 : TFLITE_ABORT
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_CHECK_GT
|
||||
#define TFLITE_CHECK_GT(x, y) ((x) > (y)) ? (void)0 : TFLITE_ABORT
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_CHECK_LE
|
||||
#define TFLITE_CHECK_LE(x, y) ((x) <= (y)) ? (void)0 : TFLITE_ABORT
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_CHECK_LT
|
||||
#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
|
||||
#endif
|
||||
|
||||
// TODO(ahentz): Clean up.
|
||||
using int8 = std::int8_t;
|
||||
using uint8 = std::uint8_t;
|
||||
using int16 = std::int16_t;
|
||||
using uint16 = std::uint16_t;
|
||||
using int32 = std::int32_t;
|
||||
using uint32 = std::uint32_t;
|
||||
|
||||
// TFLITE_DEPRECATED()
|
||||
//
|
||||
// Duplicated from absl/base/macros.h to avoid pulling in that library.
|
||||
// Marks a deprecated class, struct, enum, function, method and variable
|
||||
// declarations. The macro argument is used as a custom diagnostic message (e.g.
|
||||
// suggestion of a better alternative).
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// class TFLITE_DEPRECATED("Use Bar instead") Foo {...};
|
||||
// TFLITE_DEPRECATED("Use Baz instead") void Bar() {...}
|
||||
//
|
||||
// Every usage of a deprecated entity will trigger a warning when compiled with
|
||||
// clang's `-Wdeprecated-declarations` option. This option is turned off by
|
||||
// default, but the warnings will be reported by clang-tidy.
|
||||
#if defined(__clang__) && __cplusplus >= 201103L
|
||||
#define TFLITE_DEPRECATED(message) __attribute__((deprecated(message)))
|
||||
#endif
|
||||
|
||||
#ifndef TFLITE_DEPRECATED
|
||||
#define TFLITE_DEPRECATED(message)
|
||||
#endif
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_COMPATIBILITY_H_
|
||||
39
code/lib/tfmicro/tensorflow/lite/kernels/internal/cppmath.h
Normal file
39
code/lib/tfmicro/tensorflow/lite/kernels/internal/cppmath.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace tflite {
|
||||
|
||||
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
|
||||
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO)
|
||||
#define TF_LITE_GLOBAL_STD_PREFIX
|
||||
#else
|
||||
#define TF_LITE_GLOBAL_STD_PREFIX std
|
||||
#endif
|
||||
|
||||
#define DECLARE_STD_GLOBAL_SWITCH1(tf_name, std_name) \
|
||||
template <class T> \
|
||||
inline T tf_name(const T x) { \
|
||||
return TF_LITE_GLOBAL_STD_PREFIX::std_name(x); \
|
||||
}
|
||||
|
||||
DECLARE_STD_GLOBAL_SWITCH1(TfLiteRound, round);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_CPPMATH_H_
|
||||
@@ -0,0 +1,40 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
|
||||
|
||||
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
#define USE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
|
||||
#define USE_NEON
|
||||
#include "NEON_2_SSE.h"
|
||||
#endif
|
||||
|
||||
// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
|
||||
// defined, PortableSomeFunc(args) otherwise.
|
||||
#ifdef USE_NEON
|
||||
// Always use Neon code
|
||||
#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
|
||||
|
||||
#else
|
||||
// No NEON available: Use Portable code
|
||||
#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
|
||||
|
||||
#endif // defined(USE_NEON)
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
|
||||
@@ -0,0 +1,395 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
// These constants are used to manipulate the binary representation of doubles.
|
||||
// Double-precision binary64 floating point format is:
|
||||
// Bit | 63 | 62-52 | 51-0 |
|
||||
// | Sign | Exponent | Fraction |
|
||||
// To avoid 64-bit integers as much as possible, I break this into high and
|
||||
// low 32-bit chunks. High is:
|
||||
// Bit | 31 | 30-20 | 19-0 |
|
||||
// | Sign | Exponent | High Fraction |
|
||||
// Low is:
|
||||
// Bit | 31-0 |
|
||||
// | Low Fraction |
|
||||
// We then access the components through logical bit-wise operations to
|
||||
// extract the parts needed, with the positions and masks derived from the
|
||||
// layout shown above.
|
||||
constexpr uint64_t kSignMask = 0x8000000000000000LL;
|
||||
constexpr uint64_t kExponentMask = 0x7ff0000000000000LL;
|
||||
constexpr int32_t kExponentShift = 52;
|
||||
constexpr int32_t kExponentBias = 1023;
|
||||
constexpr uint32_t kExponentIsBadNum = 0x7ff;
|
||||
constexpr uint64_t kFractionMask = 0x000fffffffc00000LL;
|
||||
constexpr uint32_t kFractionShift = 22;
|
||||
constexpr uint32_t kFractionRoundingMask = 0x003fffff;
|
||||
constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
|
||||
} // namespace
|
||||
|
||||
void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
|
||||
int* shift) {
|
||||
if (double_multiplier == 0.) {
|
||||
*quantized_multiplier = 0;
|
||||
*shift = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef TFLITE_EMULATE_FLOAT
|
||||
// If we're trying to avoid the use of floating-point instructions (for
|
||||
// example on microcontrollers) then use an alternative implementation
|
||||
// that only requires integer and bitwise operations. To enable this, you
|
||||
// need to set the define during the build process for your platform.
|
||||
int64_t q_fixed = IntegerFrExp(double_multiplier, shift);
|
||||
#else // TFLITE_EMULATE_FLOAT
|
||||
const double q = std::frexp(double_multiplier, shift);
|
||||
auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1ll << 31)));
|
||||
#endif // TFLITE_EMULATE_FLOAT
|
||||
TFLITE_CHECK(q_fixed <= (1ll << 31));
|
||||
if (q_fixed == (1ll << 31)) {
|
||||
q_fixed /= 2;
|
||||
++*shift;
|
||||
}
|
||||
TFLITE_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
|
||||
// A shift amount smaller than -31 would cause all bits to be shifted out
|
||||
// and thus all results would be zero. We implement that instead with
|
||||
// q_fixed==0, so as to avoid hitting issues with right-shift
|
||||
// operations with shift amounts greater than 31. Note that this happens
|
||||
// roughly when abs(double_multiplier) < 2^-31 and the present handling means
|
||||
// that we're effectively flushing tiny double_multiplier's to zero.
|
||||
// We could conceivably handle values in the range (roughly) [32, 63]
|
||||
// as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
|
||||
// the present handling is just doing 'flush denormals to zero'. We could
|
||||
// reconsider and actually generate nonzero denormals if a need arises.
|
||||
if (*shift < -31) {
|
||||
*shift = 0;
|
||||
q_fixed = 0;
|
||||
}
|
||||
*quantized_multiplier = static_cast<int32_t>(q_fixed);
|
||||
}
|
||||
|
||||
void QuantizeMultiplierGreaterThanOne(double double_multiplier,
|
||||
int32_t* quantized_multiplier,
|
||||
int* left_shift) {
|
||||
TFLITE_CHECK_GT(double_multiplier, 1.);
|
||||
QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift);
|
||||
TFLITE_CHECK_GE(*left_shift, 0);
|
||||
}
|
||||
|
||||
void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
|
||||
int32_t* quantized_multiplier,
|
||||
int* left_shift) {
|
||||
TFLITE_CHECK_LT(double_multiplier, 1.);
|
||||
TFLITE_CHECK_GT(double_multiplier, 0.);
|
||||
int shift;
|
||||
QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
|
||||
TFLITE_CHECK_LE(shift, 0);
|
||||
*left_shift = shift;
|
||||
}
|
||||
|
||||
int64_t IntegerFrExp(double input, int* shift) {
|
||||
// Make sure our assumptions about the double layout hold.
|
||||
TFLITE_CHECK_EQ(8, sizeof(double));
|
||||
|
||||
// We want to access the bits of the input double value directly, which is
|
||||
// tricky to do safely, so use a union to handle the casting.
|
||||
union {
|
||||
double double_value;
|
||||
uint64_t double_as_uint;
|
||||
} cast_union;
|
||||
cast_union.double_value = input;
|
||||
const uint64_t u = cast_union.double_as_uint;
|
||||
|
||||
// If the bitfield is all zeros apart from the sign bit, this is a normalized
|
||||
// zero value, so return standard values for this special case.
|
||||
if ((u & ~kSignMask) == 0) {
|
||||
*shift = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Deal with NaNs and Infs, which are always indicated with a fixed pattern in
|
||||
// the exponent, and distinguished by whether the fractions are zero or
|
||||
// non-zero.
|
||||
const uint32_t exponent_part = ((u & kExponentMask) >> kExponentShift);
|
||||
if (exponent_part == kExponentIsBadNum) {
|
||||
*shift = std::numeric_limits<int>::max();
|
||||
if (u & kFractionMask) {
|
||||
// NaN, so just return zero (with the exponent set to INT_MAX).
|
||||
return 0;
|
||||
} else {
|
||||
// Infinity, so return +/- INT_MAX.
|
||||
if (u & kSignMask) {
|
||||
return std::numeric_limits<int64_t>::min();
|
||||
} else {
|
||||
return std::numeric_limits<int64_t>::max();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The shift is fairly easy to extract from the high bits of the double value,
|
||||
// just by masking it out and applying a bias. The std::frexp() implementation
|
||||
// always returns values between 0.5 and 1.0 though, whereas the exponent
|
||||
// assumes 1.0 to 2.0 is the standard range, so I add on one to match that
|
||||
// interface.
|
||||
*shift = (exponent_part - kExponentBias) + 1;
|
||||
|
||||
// There's an implicit high bit in the double format definition, so make sure
|
||||
// we include that at the top, and then reconstruct the rest of the fractional
|
||||
// value from the remaining fragments.
|
||||
int64_t fraction = 0x40000000 + ((u & kFractionMask) >> kFractionShift);
|
||||
|
||||
// We're cutting off some bits at the bottom, so to exactly match the standard
|
||||
// frexp implementation here we'll apply rounding by adding one to the least
|
||||
// significant bit of the result if the discarded portion is over half of the
|
||||
// maximum.
|
||||
if ((u & kFractionRoundingMask) > kFractionRoundingThreshold) {
|
||||
fraction += 1;
|
||||
}
|
||||
// Negate the fraction if the sign bit was set.
|
||||
if (u & kSignMask) {
|
||||
fraction *= -1;
|
||||
}
|
||||
|
||||
return fraction;
|
||||
}
|
||||
|
||||
double DoubleFromFractionAndShift(int64_t fraction, int shift) {
|
||||
union {
|
||||
double double_value;
|
||||
uint64_t double_as_uint;
|
||||
} result;
|
||||
|
||||
// Detect NaNs and infinities.
|
||||
if (shift == std::numeric_limits<int>::max()) {
|
||||
if (fraction == 0) {
|
||||
return std::numeric_limits<double>::quiet_NaN();
|
||||
} else if (fraction > 0) {
|
||||
return std::numeric_limits<double>::infinity();
|
||||
} else {
|
||||
return -std::numeric_limits<double>::infinity();
|
||||
}
|
||||
}
|
||||
|
||||
// Return a normalized zero for a zero fraction.
|
||||
if (fraction == 0) {
|
||||
result.double_as_uint = 0;
|
||||
return result.double_value;
|
||||
}
|
||||
|
||||
bool is_negative = (fraction < 0);
|
||||
int64_t encoded_fraction = is_negative ? -fraction : fraction;
|
||||
int64_t encoded_shift = (shift - 1);
|
||||
while (encoded_fraction < 0x40000000) {
|
||||
encoded_fraction *= 2;
|
||||
encoded_shift -= 1;
|
||||
}
|
||||
while (encoded_fraction > 0x80000000) {
|
||||
encoded_fraction /= 2;
|
||||
encoded_shift += 1;
|
||||
}
|
||||
encoded_fraction -= 0x40000000;
|
||||
if (encoded_shift < -1022) {
|
||||
encoded_shift = -1023;
|
||||
} else if (encoded_shift > 1022) {
|
||||
encoded_shift = 1023;
|
||||
}
|
||||
encoded_shift += kExponentBias;
|
||||
uint64_t encoded_sign = is_negative ? kSignMask : 0;
|
||||
result.double_as_uint = encoded_sign | (encoded_shift << kExponentShift) |
|
||||
(encoded_fraction << kFractionShift);
|
||||
return result.double_value;
|
||||
}
|
||||
|
||||
double IntegerDoubleMultiply(double a, double b) {
|
||||
int a_shift;
|
||||
const int64_t a_fraction = IntegerFrExp(a, &a_shift);
|
||||
int b_shift;
|
||||
const int64_t b_fraction = IntegerFrExp(b, &b_shift);
|
||||
// Detect NaNs and infinities.
|
||||
if (a_shift == std::numeric_limits<int>::max() ||
|
||||
(b_shift == std::numeric_limits<int>::max())) {
|
||||
return std::numeric_limits<double>::quiet_NaN();
|
||||
}
|
||||
const int result_shift = a_shift + b_shift + 1;
|
||||
const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
|
||||
return DoubleFromFractionAndShift(result_fraction, result_shift);
|
||||
}
|
||||
|
||||
int IntegerDoubleCompare(double a, double b) {
|
||||
int a_shift;
|
||||
const int64_t a_fraction = IntegerFrExp(a, &a_shift);
|
||||
int b_shift;
|
||||
const int64_t b_fraction = IntegerFrExp(b, &b_shift);
|
||||
|
||||
// Detect NaNs and infinities.
|
||||
if (a_shift == std::numeric_limits<int>::max() ||
|
||||
(b_shift == std::numeric_limits<int>::max())) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((a_fraction == 0) && (b_fraction < 0)) {
|
||||
return 1;
|
||||
} else if ((a_fraction < 0) && (b_fraction == 0)) {
|
||||
return -1;
|
||||
} else if (a_shift < b_shift) {
|
||||
return -1;
|
||||
} else if (a_shift > b_shift) {
|
||||
return 1;
|
||||
} else if (a_fraction < b_fraction) {
|
||||
return -1;
|
||||
} else if (a_fraction > b_fraction) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void PreprocessSoftmaxScaling(double beta, double input_scale,
|
||||
int input_integer_bits,
|
||||
int32_t* quantized_multiplier, int* left_shift) {
|
||||
// If the overall multiplier (input and beta) is large, then exp() of an
|
||||
// input difference of 1 scaled by this will be large. In other words, we
|
||||
// can cap the multiplier and know that, when it is used, the output will be
|
||||
// (round to) zero wherever the input is not at the maximum value.
|
||||
|
||||
// If the overall scale is less than one, and input_integer_bits=0, then the
|
||||
// result is double equivalent of Q0.31 (actually with more precision). Thus
|
||||
// this generates a Q(input_integer_bits).(31-input_integer_bits)
|
||||
// representation.
|
||||
#ifdef TFLITE_EMULATE_FLOAT
|
||||
const double input_beta = IntegerDoubleMultiply(beta, input_scale);
|
||||
int shift;
|
||||
int64_t fraction = IntegerFrExp(input_beta, &shift);
|
||||
shift += (31 - input_integer_bits);
|
||||
double input_beta_real_multiplier =
|
||||
DoubleFromFractionAndShift(fraction, shift);
|
||||
if (IntegerDoubleCompare(input_beta_real_multiplier, (1ll << 31) - 1.0) > 0) {
|
||||
input_beta_real_multiplier = (1ll << 31) - 1.0;
|
||||
}
|
||||
#else // TFLITE_EMULATE_FLOAT
|
||||
const double input_beta_real_multiplier = std::min(
|
||||
beta * input_scale * (1 << (31 - input_integer_bits)), (1ll << 31) - 1.0);
|
||||
#endif // TFLITE_EMULATE_FLOAT
|
||||
|
||||
QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
|
||||
quantized_multiplier, left_shift);
|
||||
}
|
||||
|
||||
void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
|
||||
int input_integer_bits,
|
||||
int32_t* quantized_multiplier,
|
||||
int* left_shift,
|
||||
int32_t* reverse_scaling_divisor,
|
||||
int* reverse_scaling_left_shift) {
|
||||
PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits,
|
||||
quantized_multiplier, left_shift);
|
||||
|
||||
// Also calculate what amounts to the inverse scaling factor for the input.
|
||||
const double real_reverse_scaling_divisor =
|
||||
(1 << (31 - *left_shift)) / static_cast<double>(*quantized_multiplier);
|
||||
tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor,
|
||||
reverse_scaling_divisor,
|
||||
reverse_scaling_left_shift);
|
||||
}
|
||||
|
||||
int CalculateInputRadius(int input_integer_bits, int input_left_shift,
|
||||
int total_signed_bits) {
|
||||
#ifdef TFLITE_EMULATE_FLOAT
|
||||
int64_t result = (1 << input_integer_bits) - 1;
|
||||
result <<= (total_signed_bits - input_integer_bits);
|
||||
result >>= input_left_shift;
|
||||
return result;
|
||||
#else // TFLITE_EMULATE_FLOAT
|
||||
const double max_input_rescaled =
|
||||
1.0 * ((1 << input_integer_bits) - 1) *
|
||||
(1ll << (total_signed_bits - input_integer_bits)) /
|
||||
(1ll << input_left_shift);
|
||||
// Tighten bound using floor. Suppose that we could use the exact value.
|
||||
// After scaling the difference, the result would be at the maximum. Thus we
|
||||
// must ensure that our value has lower magnitude.
|
||||
return static_cast<int>(std::floor(max_input_rescaled));
|
||||
#endif // TFLITE_EMULATE_FLOAT
|
||||
}
|
||||
|
||||
void NudgeQuantizationRange(const float min, const float max,
|
||||
const int quant_min, const int quant_max,
|
||||
float* nudged_min, float* nudged_max,
|
||||
float* nudged_scale) {
|
||||
// This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
|
||||
const float quant_min_float = static_cast<float>(quant_min);
|
||||
const float quant_max_float = static_cast<float>(quant_max);
|
||||
*nudged_scale = (max - min) / (quant_max_float - quant_min_float);
|
||||
const float zero_point_from_min = quant_min_float - min / *nudged_scale;
|
||||
uint16 nudged_zero_point;
|
||||
if (zero_point_from_min < quant_min_float) {
|
||||
nudged_zero_point = static_cast<uint16>(quant_min);
|
||||
} else if (zero_point_from_min > quant_max_float) {
|
||||
nudged_zero_point = static_cast<uint16>(quant_max);
|
||||
} else {
|
||||
nudged_zero_point = static_cast<uint16>(TfLiteRound(zero_point_from_min));
|
||||
}
|
||||
*nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
|
||||
*nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
|
||||
}
|
||||
|
||||
void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
|
||||
const float nudged_max, const float* input_data,
|
||||
float* output_data, const float size) {
|
||||
// This code originates from tensorflow/core/kernels/fake_quant_ops_functor.h.
|
||||
const float inv_nudged_scale = 1.0f / nudged_scale;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
const float src_val = input_data[i];
|
||||
const float clamped = std::min(nudged_max, std::max(nudged_min, src_val));
|
||||
const float clamped_shifted = clamped - nudged_min;
|
||||
const float dst_val =
|
||||
TfLiteRound(clamped_shifted * inv_nudged_scale) * nudged_scale +
|
||||
nudged_min;
|
||||
output_data[i] = dst_val;
|
||||
}
|
||||
}
|
||||
|
||||
bool CheckedLog2(const float x, int* log2_result) {
|
||||
// Using TfLiteRound instead of std::round and std::log instead of
|
||||
// std::log2 to work around these functions being missing in a toolchain
|
||||
// used in some TensorFlow tests as of May 2018.
|
||||
const float x_log2 = std::log(x) * (1.0f / std::log(2.0f));
|
||||
const float x_log2_rounded = TfLiteRound(x_log2);
|
||||
const float x_log2_fracpart = x_log2 - x_log2_rounded;
|
||||
|
||||
*log2_result = static_cast<int>(x_log2_rounded);
|
||||
return std::abs(x_log2_fracpart) < 1e-3f;
|
||||
}
|
||||
|
||||
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
|
||||
int32_t* effective_scale_significand,
|
||||
int* effective_shift) {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i],
|
||||
&effective_shift[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,292 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Given the min and max values of a float array, return
|
||||
// reasonable quantization parameters to use for this array.
|
||||
template <typename T>
|
||||
QuantizationParams ChooseQuantizationParams(double rmin, double rmax,
|
||||
bool narrow_range) {
|
||||
const T qmin = std::numeric_limits<T>::min() + (narrow_range ? 1 : 0);
|
||||
const T qmax = std::numeric_limits<T>::max();
|
||||
const double qmin_double = qmin;
|
||||
const double qmax_double = qmax;
|
||||
// 0 should always be a representable value. Let's assume that the initial
|
||||
// min,max range contains 0.
|
||||
TFLITE_CHECK_LE(rmin, 0.);
|
||||
TFLITE_CHECK_GE(rmax, 0.);
|
||||
if (rmin == rmax) {
|
||||
// Special case where the min,max range is a point. Should be {0}.
|
||||
TFLITE_CHECK_EQ(rmin, 0.);
|
||||
TFLITE_CHECK_EQ(rmax, 0.);
|
||||
QuantizationParams quantization_params;
|
||||
quantization_params.zero_point = 0;
|
||||
quantization_params.scale = 0.;
|
||||
return quantization_params;
|
||||
}
|
||||
|
||||
// General case.
|
||||
//
|
||||
// First determine the scale.
|
||||
const double scale = (rmax - rmin) / (qmax_double - qmin_double);
|
||||
|
||||
// Zero-point computation.
|
||||
// First the initial floating-point computation. The zero-point can be
|
||||
// determined from solving an affine equation for any known pair
|
||||
// (real value, corresponding quantized value).
|
||||
// We know two such pairs: (rmin, qmin) and (rmax, qmax).
|
||||
// The arithmetic error on the zero point computed from either pair
|
||||
// will be roughly machine_epsilon * (sum of absolute values of terms)
|
||||
// so we want to use the variant that adds the smaller terms.
|
||||
const double zero_point_from_min = qmin_double - rmin / scale;
|
||||
const double zero_point_from_max = qmax_double - rmax / scale;
|
||||
const double zero_point_from_min_error =
|
||||
std::abs(qmin_double) + std::abs(rmin / scale);
|
||||
const double zero_point_from_max_error =
|
||||
std::abs(qmax_double) + std::abs(rmax / scale);
|
||||
|
||||
const double zero_point_double =
|
||||
zero_point_from_min_error < zero_point_from_max_error
|
||||
? zero_point_from_min
|
||||
: zero_point_from_max;
|
||||
|
||||
// Now we need to nudge the zero point to be an integer
|
||||
// (our zero points are integer, and this is motivated by the requirement
|
||||
// to be able to represent the real value "0" exactly as a quantized value,
|
||||
// which is required in multiple places, for example in Im2col with SAME
|
||||
// padding).
|
||||
T nudged_zero_point = 0;
|
||||
if (zero_point_double < qmin_double) {
|
||||
nudged_zero_point = qmin;
|
||||
} else if (zero_point_double > qmax_double) {
|
||||
nudged_zero_point = qmax;
|
||||
} else {
|
||||
nudged_zero_point = static_cast<T>(round(zero_point_double));
|
||||
}
|
||||
// The zero point should always be in the range of quantized value,
|
||||
// [qmin, qmax].
|
||||
TFLITE_CHECK_GE(nudged_zero_point, qmin);
|
||||
TFLITE_CHECK_LE(nudged_zero_point, qmax);
|
||||
|
||||
// Finally, store the result nudged quantization params.
|
||||
QuantizationParams quantization_params;
|
||||
quantization_params.zero_point = nudged_zero_point;
|
||||
quantization_params.scale = scale;
|
||||
return quantization_params;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
QuantizationParams ChooseQuantizationParams(double rmin, double rmax) {
|
||||
return ChooseQuantizationParams<T>(rmin, rmax, false);
|
||||
}
|
||||
|
||||
// Converts a floating-point number to an integer. For all inputs x where
|
||||
// static_cast<IntOut>(x) is legal according to the C++ standard, the result
|
||||
// is identical to that cast (i.e. the result is x with its fractional part
|
||||
// truncated whenever that is representable as IntOut).
|
||||
//
|
||||
// static_cast would cause undefined behavior for the following cases, which
|
||||
// have well-defined behavior for this function:
|
||||
//
|
||||
// 1. If x is NaN, the result is zero.
|
||||
//
|
||||
// 2. If the truncated form of x is above the representable range of IntOut,
|
||||
// the result is std::numeric_limits<IntOut>::max().
|
||||
//
|
||||
// 3. If the truncated form of x is below the representable range of IntOut,
|
||||
// the result is std::numeric_limits<IntOut>::min().
|
||||
//
|
||||
// Note that cases #2 and #3 cover infinities as well as finite numbers.
|
||||
//
|
||||
// The range of FloatIn must include the range of IntOut, otherwise
|
||||
// the results are undefined.
|
||||
// TODO(sfeuz): Replace by absl::SafeCast once available.
|
||||
template <class IntOut, class FloatIn>
|
||||
IntOut SafeCast(FloatIn x) {
|
||||
static_assert(!std::numeric_limits<FloatIn>::is_integer,
|
||||
"FloatIn is integer");
|
||||
static_assert(std::numeric_limits<IntOut>::is_integer,
|
||||
"IntOut is not integer");
|
||||
static_assert(std::numeric_limits<IntOut>::radix == 2, "IntOut is base 2");
|
||||
|
||||
// Special case NaN, for which the logic below doesn't work.
|
||||
if (std::isnan(x)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Negative values all clip to zero for unsigned results.
|
||||
if (!std::numeric_limits<IntOut>::is_signed && x < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Handle infinities.
|
||||
if (std::isinf(x)) {
|
||||
return x < 0 ? std::numeric_limits<IntOut>::min()
|
||||
: std::numeric_limits<IntOut>::max();
|
||||
}
|
||||
|
||||
// Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0),
|
||||
// unless x is zero in which case exp == 0. Note that this implies that the
|
||||
// magnitude of x is strictly less than 2^exp.
|
||||
int exp = 0;
|
||||
std::frexp(x, &exp);
|
||||
|
||||
// Let N be the number of non-sign bits in the representation of IntOut. If
|
||||
// the magnitude of x is strictly less than 2^N, the truncated version of x
|
||||
// is representable as IntOut. The only representable integer for which this
|
||||
// is not the case is kMin for signed types (i.e. -2^N), but that is covered
|
||||
// by the fall-through below.
|
||||
if (exp <= std::numeric_limits<IntOut>::digits) {
|
||||
return x;
|
||||
}
|
||||
|
||||
// Handle numbers with magnitude >= 2^N.
|
||||
return x < 0 ? std::numeric_limits<IntOut>::min()
|
||||
: std::numeric_limits<IntOut>::max();
|
||||
}
|
||||
|
||||
// Decompose a double multiplier into a Q0.31 int32 representation of its
|
||||
// significand, and shift representation of NEGATIVE its exponent ---
|
||||
// this is intended as a RIGHT-shift.
|
||||
//
|
||||
// Restricted to the case where the multiplier < 1 (and non-negative).
|
||||
void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
|
||||
int32_t* quantized_multiplier,
|
||||
int* left_shift);
|
||||
|
||||
// Decompose a double multiplier into a Q0.31 int32 representation of its
|
||||
// significand, and shift representation of its exponent.
|
||||
//
|
||||
// Restricted to the case where the multiplier > 1.
|
||||
void QuantizeMultiplierGreaterThanOne(double double_multiplier,
|
||||
int32_t* quantized_multiplier,
|
||||
int* left_shift);
|
||||
|
||||
// Decompose a double multiplier into a Q0.31 int32 representation of its
|
||||
// significand, and shift representation of its exponent.
|
||||
//
|
||||
// Handles an arbitrary positive multiplier. The 'shift' output-value is
|
||||
// basically the 'floating-point exponent' of the multiplier:
|
||||
// Negative for a right-shift (when the multiplier is <1), positive for a
|
||||
// left-shift (when the multiplier is >1)
|
||||
void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
|
||||
int* shift);
|
||||
|
||||
// Splits a double input value into a returned fraction, and a shift value from
|
||||
// the exponent, using only bitwise and integer operations to support
|
||||
// microcontrollers and other environments without floating-point support.
|
||||
//
|
||||
// This is designed to be a replacement for how std::frexp() is used within the
|
||||
// QuantizeMultiplier() function, and so has a different signature than the
|
||||
// standard version, returning a 64-bit integer rather than a double. This
|
||||
// result has a maximum value of 1<<31, with the fraction expressed as a
|
||||
// proportion of that maximum.
|
||||
//
|
||||
// std::frexp() returns NaNs and infinities unmodified, but since we're
|
||||
// returning integers that can't represent those values, instead we return
|
||||
// a shift of std::numeric_limits<int>::max() for all bad numbers, with an int64
|
||||
// result of 0 for NaNs, std:numeric_limits<int64_t>::max() for +INFINITY, and
|
||||
// std::numeric_limits<int64_t>::min() for -INFINITY. Denormalized inputs will
|
||||
// result in return values that end up truncating some bits at the end,
|
||||
// reflecting the loss of precision inherent in denormalization.
|
||||
int64_t IntegerFrExp(double input, int* shift);
|
||||
|
||||
// Converts an integer fraction in the format produced by IntegerFrExp (where
|
||||
// 0x40000000 is 1.0) and an exponent shift (between -1022 and +1022) into an
|
||||
// IEEE binary64 double format result. The implementation uses only integer and
|
||||
// bitwise operators, so no floating point hardware support or emulation is
|
||||
// needed. This is here so quantized operations can run non-time-critical
|
||||
// preparation calculations on microcontrollers and other platforms without
|
||||
// float support.
|
||||
double DoubleFromFractionAndShift(int64_t fraction, int shift);
|
||||
|
||||
// Performs a multiplication of two numbers in double format, using only integer
|
||||
// and bitwise instructions. This is aimed at supporting housekeeping functions
|
||||
// for quantized operations on microcontrollers without floating-point hardware.
|
||||
double IntegerDoubleMultiply(double a, double b);
|
||||
|
||||
// Returns -1 if a is less than b, 0 if a and b are equal, and +1 if a is
|
||||
// greater than b. It is implemented using only integer and logical instructions
|
||||
// so that it can be easily run on microcontrollers for quantized operations.
|
||||
int IntegerDoubleCompare(double a, double b);
|
||||
|
||||
// This first creates a multiplier in a double equivalent of
|
||||
// Q(input_integer_bits).(31-input_integer_bits) representation, with extra
|
||||
// precision in the double's fractional bits. It then splits the result into
|
||||
// significand and exponent.
|
||||
void PreprocessSoftmaxScaling(double beta, double input_scale,
|
||||
int input_integer_bits,
|
||||
int32_t* quantized_multiplier, int* left_shift);
|
||||
// Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
|
||||
void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
|
||||
int input_integer_bits,
|
||||
int32_t* quantized_multiplier,
|
||||
int* left_shift,
|
||||
int32_t* reverse_scaling_divisor,
|
||||
int* reverse_scaling_left_shift);
|
||||
// Calculate the largest input that will result in a within-bounds intermediate
|
||||
// result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words,
|
||||
// it must not overflow before we reduce the value by multiplication by the
|
||||
// input multiplier. The negative radius is used as the minimum difference in
|
||||
// Softmax.
|
||||
int CalculateInputRadius(int input_integer_bits, int input_left_shift,
|
||||
int total_signed_bits = 31);
|
||||
|
||||
// Nudges a min/max quantization range to ensure zero is zero.
|
||||
// Gymnastics with nudged zero point is to ensure that real zero maps to
|
||||
// an integer, which is required for e.g. zero-padding in convolutional layers.
|
||||
// Outputs nudged_min, nudged_max, nudged_scale.
|
||||
void NudgeQuantizationRange(const float min, const float max,
|
||||
const int quant_min, const int quant_max,
|
||||
float* nudged_min, float* nudged_max,
|
||||
float* nudged_scale);
|
||||
|
||||
// Fake quantizes (quantizes and dequantizes) input_data using the scale,
|
||||
// nudged_min, and nudged_max from NudgeQuantizationRange. This matches the code
|
||||
// in TensorFlow's FakeQuantizeWithMinMaxVarsFunctor.
|
||||
void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
|
||||
const float nudged_max, const float* input_data,
|
||||
float* output_data, const float size);
|
||||
|
||||
// If x is approximately a power of two (with any positive or negative
|
||||
// exponent), stores that exponent (i.e. log2(x)) in *log2_result, otherwise
|
||||
// returns false.
|
||||
bool CheckedLog2(const float x, int* log2_result);
|
||||
|
||||
// Decomposes an array of double multipliers into a Q0.31 int32 representation
|
||||
// of its significand, and shift representation of its exponent.
|
||||
//
|
||||
// Handles an arbitrary multiplier. The 'shift' output-value is
|
||||
// basically the 'floating-point exponent' of the multiplier:
|
||||
// Negative for a right-shift (when the multiplier is <1), positive for a
|
||||
// left-shift (when the multiplier is >1)
|
||||
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
|
||||
int32_t* effective_scale_significand,
|
||||
int* effective_shift);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_
|
||||
@@ -0,0 +1,419 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
template <typename T>
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const T* input1_data,
|
||||
const RuntimeShape& input2_shape, const T* input2_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] + input2_data[i], params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const float* input1_data,
|
||||
const RuntimeShape& input2_shape, const float* input2_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
auto x = input1_data[i] + input2_data[i];
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
x, params.float_activation_min, params.float_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
// Element-wise add that can often be used for inner loop of broadcast add as
|
||||
// well as the non-broadcast add.
|
||||
inline void AddElementwise(int size, const ArithmeticParams& params,
|
||||
const uint8* input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
// Scalar-broadcast add that can be used for inner loop of more general
|
||||
// broadcast add, so that, for example, scalar-broadcast with batch will still
|
||||
// be fast.
|
||||
inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
|
||||
uint8 input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
|
||||
const int32 input1_val = params.input1_offset + input1_data;
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8* input2_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int16* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16* input2_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
|
||||
const int input1_shift = params.input1_shift;
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
const int16 output_activation_min = params.quantized_activation_min;
|
||||
const int16 output_activation_max = params.quantized_activation_max;
|
||||
|
||||
TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
|
||||
TFLITE_DCHECK_LE(input1_shift, 0);
|
||||
TFLITE_DCHECK_LE(params.input2_shift, 0);
|
||||
const int16* not_shift_input = input1_shift == 0 ? input1_data : input2_data;
|
||||
const int16* shift_input = input1_shift == 0 ? input2_data : input1_data;
|
||||
const int input_right_shift =
|
||||
input1_shift == 0 ? -params.input2_shift : -input1_shift;
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
// F0 uses 0 integer bits, range [-1, 1].
|
||||
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
|
||||
|
||||
F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
|
||||
F0 scaled_input = F0::FromRaw(
|
||||
gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
|
||||
F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
|
||||
const int16 raw_output = result.raw();
|
||||
const int16 clamped_output = std::min(
|
||||
output_activation_max, std::max(output_activation_min, raw_output));
|
||||
output_data[i] = clamped_output;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(jiawen): We can implement BroadcastAdd on buffers of arbitrary
|
||||
// dimensionality if the runtime code does a single loop over one dimension
|
||||
// that handles broadcasting as the base case. The code generator would then
|
||||
// generate max(D1, D2) nested for loops.
|
||||
// TODO(benoitjacob): BroadcastAdd is intentionally duplicated from
|
||||
// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
|
||||
// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
|
||||
// reference_ops.h.
|
||||
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const float* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const float* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
const RuntimeShape extended_output_shape =
|
||||
RuntimeShape::ExtendedShape(4, output_shape);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
ActivationFunctionWithMinMax(
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
|
||||
params.float_activation_min, params.float_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
const RuntimeShape extended_output_shape =
|
||||
RuntimeShape::ExtendedShape(4, output_shape);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
ActivationFunctionWithMinMax(
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
|
||||
params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const uint8* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
const RuntimeShape extended_output_shape =
|
||||
RuntimeShape::ExtendedShape(4, output_shape);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
const int32 shifted_input1_val =
|
||||
input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val =
|
||||
input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier,
|
||||
params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier,
|
||||
params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
|
||||
const RuntimeShape& unswitched_input1_shape,
|
||||
const uint8* unswitched_input1_data,
|
||||
const RuntimeShape& unswitched_input2_shape,
|
||||
const uint8* unswitched_input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
ArithmeticParams switched_params = unswitched_params;
|
||||
switched_params.input1_offset = unswitched_params.input2_offset;
|
||||
switched_params.input1_multiplier = unswitched_params.input2_multiplier;
|
||||
switched_params.input1_shift = unswitched_params.input2_shift;
|
||||
switched_params.input2_offset = unswitched_params.input1_offset;
|
||||
switched_params.input2_multiplier = unswitched_params.input1_multiplier;
|
||||
switched_params.input2_shift = unswitched_params.input1_shift;
|
||||
|
||||
const bool use_unswitched =
|
||||
unswitched_params.broadcast_category ==
|
||||
tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
|
||||
|
||||
const ArithmeticParams& params =
|
||||
use_unswitched ? unswitched_params : switched_params;
|
||||
const uint8* input1_data =
|
||||
use_unswitched ? unswitched_input1_data : unswitched_input2_data;
|
||||
const uint8* input2_data =
|
||||
use_unswitched ? unswitched_input2_data : unswitched_input1_data;
|
||||
|
||||
// Fivefold nested loops. The second input resets its position for each
|
||||
// iteration of the second loop. The first input resets its position at the
|
||||
// beginning of the fourth loop. The innermost loop is an elementwise add of
|
||||
// sections of the arrays.
|
||||
uint8* output_data_ptr = output_data;
|
||||
const uint8* input1_data_ptr = input1_data;
|
||||
const uint8* input2_data_reset = input2_data;
|
||||
// In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
|
||||
// between input shapes. y3 for input 1 is always broadcast, and so the
|
||||
// dimension there is 1, whereas optionally y1 might be broadcast for input 2.
|
||||
// Put another way,
|
||||
// input1.shape.FlatSize = y0 * y1 * y2 * y4,
|
||||
// input2.shape.FlatSize = y0 * y2 * y3 * y4.
|
||||
int y0 = params.broadcast_shape[0];
|
||||
int y1 = params.broadcast_shape[1];
|
||||
int y2 = params.broadcast_shape[2];
|
||||
int y3 = params.broadcast_shape[3];
|
||||
int y4 = params.broadcast_shape[4];
|
||||
if (y4 > 1) {
|
||||
// General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
|
||||
// dimension.
|
||||
for (int i0 = 0; i0 < y0; ++i0) {
|
||||
const uint8* input2_data_ptr;
|
||||
for (int i1 = 0; i1 < y1; ++i1) {
|
||||
input2_data_ptr = input2_data_reset;
|
||||
for (int i2 = 0; i2 < y2; ++i2) {
|
||||
for (int i3 = 0; i3 < y3; ++i3) {
|
||||
AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
|
||||
output_data_ptr);
|
||||
input2_data_ptr += y4;
|
||||
output_data_ptr += y4;
|
||||
}
|
||||
// We have broadcast y4 of input1 data y3 times, and now move on.
|
||||
input1_data_ptr += y4;
|
||||
}
|
||||
}
|
||||
// We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
|
||||
input2_data_reset = input2_data_ptr;
|
||||
}
|
||||
} else {
|
||||
// Special case of y4 == 1, in which the innermost loop is a single element
|
||||
// and can be combined with the next (y3) as an inner broadcast.
|
||||
//
|
||||
// Note that this handles the case of pure scalar broadcast when
|
||||
// y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
|
||||
// broadcast with batch (as y2 > 1).
|
||||
//
|
||||
// NOTE The process is the same as the above general case except simplified
|
||||
// for y4 == 1 and the loop over y3 is contained within the
|
||||
// AddScalarBroadcast function.
|
||||
for (int i0 = 0; i0 < y0; ++i0) {
|
||||
const uint8* input2_data_ptr;
|
||||
for (int i1 = 0; i1 < y1; ++i1) {
|
||||
input2_data_ptr = input2_data_reset;
|
||||
for (int i2 = 0; i2 < y2; ++i2) {
|
||||
AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
|
||||
output_data_ptr);
|
||||
input2_data_ptr += y3;
|
||||
output_data_ptr += y3;
|
||||
input1_data_ptr += 1;
|
||||
}
|
||||
}
|
||||
input2_data_reset = input2_data_ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
|
||||
@@ -0,0 +1,68 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
template <typename T1, typename T2, typename T3, typename Cmp>
|
||||
void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
|
||||
const T3* input2_data, const RuntimeShape& output_shape,
|
||||
T2* output_data, const Cmp& cmp) {
|
||||
TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
|
||||
TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
|
||||
output_shape.DimensionsCount());
|
||||
int axis = input2_data[0];
|
||||
if (axis < 0) {
|
||||
axis += input1_shape.DimensionsCount();
|
||||
}
|
||||
const int axis_size = input1_shape.Dims(axis);
|
||||
|
||||
int outer_size = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
|
||||
outer_size *= input1_shape.Dims(i);
|
||||
}
|
||||
|
||||
int inner_size = 1;
|
||||
const int dims_count = input1_shape.DimensionsCount();
|
||||
for (int i = axis + 1; i < dims_count; ++i) {
|
||||
TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
|
||||
inner_size *= input1_shape.Dims(i);
|
||||
}
|
||||
for (int outer = 0; outer < outer_size; ++outer) {
|
||||
for (int inner = 0; inner < inner_size; ++inner) {
|
||||
auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
|
||||
T2 min_max_index = 0;
|
||||
for (int i = 1; i < axis_size; ++i) {
|
||||
const auto& curr_value =
|
||||
input1_data[(outer * axis_size + i) * inner_size + inner];
|
||||
if (cmp(curr_value, min_max_value)) {
|
||||
min_max_value = curr_value;
|
||||
min_max_index = static_cast<T2>(i);
|
||||
}
|
||||
}
|
||||
output_data[outer * inner_size + inner] = min_max_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
|
||||
@@ -0,0 +1,84 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
// TODO(ycling): Refactoring. Remove BroadcastLogical and use the more
|
||||
// generalized and efficient BroadcastBinaryFunction.
|
||||
//
|
||||
// Also appears to duplicate MinimumMaximum.
|
||||
//
|
||||
// R: Result type. T1: Input 1 type. T2: Input 2 type.
|
||||
template <typename R, typename T1, typename T2>
|
||||
inline void BroadcastBinaryFunction4DSlow(
|
||||
const RuntimeShape& unextended_input1_shape, const T1* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape, const T2* input2_data,
|
||||
const RuntimeShape& unextended_output_shape, R* output_data,
|
||||
R (*func)(T1, T2)) {
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
|
||||
for (int b = 0; b < output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < output_shape.Dims(3); ++c) {
|
||||
auto out_idx = Offset(output_shape, b, y, x, c);
|
||||
auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
|
||||
auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
|
||||
auto in1_val = input1_data[in1_idx];
|
||||
auto in2_val = input2_data[in2_idx];
|
||||
output_data[out_idx] = func(in1_val, in2_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// R: Result type. T1: Input 1 type. T2: Input 2 type.
|
||||
// TODO(renjieliu): Refactor other binary functions to use this one.
|
||||
template <typename R, typename T1, typename T2>
|
||||
inline void BinaryFunction(const RuntimeShape& input1_shape,
|
||||
const T1* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const T2* input2_data,
|
||||
const RuntimeShape& output_shape, R* output_data,
|
||||
R (*func)(T1, T2)) {
|
||||
const int flat_size =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = func(input1_data[i], input2_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
|
||||
@@ -0,0 +1,37 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
inline void Ceil(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = std::ceil(input_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CEIL_H_
|
||||
@@ -0,0 +1,334 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/string_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
template <typename T>
|
||||
inline bool EqualFn(T lhs, T rhs) {
|
||||
return lhs == rhs;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool NotEqualFn(T lhs, T rhs) {
|
||||
return lhs != rhs;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool GreaterFn(T lhs, T rhs) {
|
||||
return lhs > rhs;
|
||||
}
|
||||
template <typename T>
|
||||
inline bool GreaterEqualFn(T lhs, T rhs) {
|
||||
return lhs >= rhs;
|
||||
}
|
||||
template <typename T>
|
||||
inline bool LessFn(T lhs, T rhs) {
|
||||
return lhs < rhs;
|
||||
}
|
||||
template <typename T>
|
||||
inline bool LessEqualFn(T lhs, T rhs) {
|
||||
return lhs <= rhs;
|
||||
}
|
||||
|
||||
inline bool StringRefEqualFn(const StringRef& lhs, const StringRef& rhs) {
|
||||
if (lhs.len != rhs.len) return false;
|
||||
for (int i = 0; i < lhs.len; ++i) {
|
||||
if (lhs.str[i] != rhs.str[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool StringRefNotEqualFn(const StringRef& lhs, const StringRef& rhs) {
|
||||
return !StringRefEqualFn(lhs, rhs);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ComparisonFn = bool (*)(T, T);
|
||||
|
||||
template <typename T, ComparisonFn<T> F>
|
||||
inline void ComparisonImpl(
|
||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
|
||||
const T* input1_data, const RuntimeShape& input2_shape,
|
||||
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
|
||||
const int64_t flatsize =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
for (int64_t i = 0; i < flatsize; ++i) {
|
||||
output_data[i] = F(input1_data[i], input2_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool (*F)(const StringRef&, const StringRef&)>
|
||||
inline void ComparisonStringImpl(const RuntimeShape& input1_shape,
|
||||
const TfLiteTensor* input1,
|
||||
const RuntimeShape& input2_shape,
|
||||
const TfLiteTensor* input2,
|
||||
const RuntimeShape& output_shape,
|
||||
bool* output_data) {
|
||||
const int64_t flatsize =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
for (int64_t i = 0; i < flatsize; ++i) {
|
||||
const auto lhs = GetString(input1, i);
|
||||
const auto rhs = GetString(input2, i);
|
||||
output_data[i] = F(lhs, rhs);
|
||||
}
|
||||
}
|
||||
|
||||
template <ComparisonFn<float> F>
|
||||
inline void Comparison(const ComparisonParams& op_params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const float* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const float* input2_data,
|
||||
const RuntimeShape& output_shape, bool* output_data) {
|
||||
ComparisonImpl<float, F>(op_params, input1_shape, input1_data, input2_shape,
|
||||
input2_data, output_shape, output_data);
|
||||
}
|
||||
|
||||
template <typename T, ComparisonFn<int32> F>
|
||||
inline void ComparisonWithScaling(
|
||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
|
||||
const T* input1_data, const RuntimeShape& input2_shape,
|
||||
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
|
||||
int left_shift = op_params.left_shift;
|
||||
int32 input1_offset = op_params.input1_offset;
|
||||
int32 input1_multiplier = op_params.input1_multiplier;
|
||||
int input1_shift = op_params.input1_shift;
|
||||
int32 input2_offset = op_params.input2_offset;
|
||||
int32 input2_multiplier = op_params.input2_multiplier;
|
||||
int input2_shift = op_params.input2_shift;
|
||||
|
||||
const int64_t flatsize =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
for (int64_t i = 0; i < flatsize; ++i) {
|
||||
const int32 input1_val = input1_offset + input1_data[i];
|
||||
const int32 input2_val = input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, input1_multiplier, input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, input2_multiplier, input2_shift);
|
||||
output_data[i] = F(scaled_input1_val, scaled_input2_val);
|
||||
}
|
||||
}
|
||||
|
||||
struct BroadcastComparison4DSlowCommon {
|
||||
const RuntimeShape output_shape;
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
};
|
||||
|
||||
inline BroadcastComparison4DSlowCommon BroadcastComparison4DSlowPreprocess(
|
||||
const RuntimeShape& unextended_input1_shape,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const RuntimeShape& unextended_output_shape) {
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
return {RuntimeShape::ExtendedShape(4, unextended_output_shape), desc1,
|
||||
desc2};
|
||||
}
|
||||
|
||||
template <typename T, ComparisonFn<T> F>
|
||||
inline void BroadcastComparison4DSlowImpl(
|
||||
const ComparisonParams& op_params,
|
||||
const RuntimeShape& unextended_input1_shape, const T* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape, const T* input2_data,
|
||||
const RuntimeShape& unextended_output_shape, bool* output_data) {
|
||||
const BroadcastComparison4DSlowCommon dims =
|
||||
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
|
||||
unextended_input2_shape,
|
||||
unextended_output_shape);
|
||||
|
||||
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
|
||||
output_data[Offset(dims.output_shape, b, y, x, c)] =
|
||||
F(input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)],
|
||||
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool (*F)(const StringRef&, const StringRef&)>
|
||||
inline void BroadcastComparison4DSlowStringImpl(
|
||||
const RuntimeShape& unextended_input1_shape, const TfLiteTensor* input1,
|
||||
const RuntimeShape& unextended_input2_shape, const TfLiteTensor* input2,
|
||||
const RuntimeShape& unextended_output_shape, bool* output_data) {
|
||||
const BroadcastComparison4DSlowCommon dims =
|
||||
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
|
||||
unextended_input2_shape,
|
||||
unextended_output_shape);
|
||||
|
||||
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
|
||||
const auto lhs =
|
||||
GetString(input1, SubscriptToIndex(dims.desc1, b, y, x, c));
|
||||
const auto rhs =
|
||||
GetString(input2, SubscriptToIndex(dims.desc2, b, y, x, c));
|
||||
output_data[Offset(dims.output_shape, b, y, x, c)] = F(lhs, rhs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <ComparisonFn<float> F>
|
||||
inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const float* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const float* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
bool* output_data) {
|
||||
BroadcastComparison4DSlowImpl<float, F>(op_params, input1_shape, input1_data,
|
||||
input2_shape, input2_data,
|
||||
output_shape, output_data);
|
||||
}
|
||||
|
||||
template <typename T, ComparisonFn<int32> F>
|
||||
inline void BroadcastComparison4DSlowWithScaling(
|
||||
const ComparisonParams& op_params,
|
||||
const RuntimeShape& unextended_input1_shape, const T* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape, const T* input2_data,
|
||||
const RuntimeShape& unextended_output_shape, bool* output_data) {
|
||||
const BroadcastComparison4DSlowCommon dims =
|
||||
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
|
||||
unextended_input2_shape,
|
||||
unextended_output_shape);
|
||||
|
||||
int left_shift = op_params.left_shift;
|
||||
int32 input1_offset = op_params.input1_offset;
|
||||
int32 input1_multiplier = op_params.input1_multiplier;
|
||||
int input1_shift = op_params.input1_shift;
|
||||
int32 input2_offset = op_params.input2_offset;
|
||||
int32 input2_multiplier = op_params.input2_multiplier;
|
||||
int input2_shift = op_params.input2_shift;
|
||||
|
||||
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
input1_offset +
|
||||
input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
input2_offset +
|
||||
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
|
||||
const int32 shifted_input1_val = input1_val * (1 << left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, input1_multiplier, input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, input2_multiplier, input2_shift);
|
||||
output_data[Offset(dims.output_shape, b, y, x, c)] =
|
||||
F(scaled_input1_val, scaled_input2_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define TFLITE_COMPARISON_OP(name) \
|
||||
inline void name(const ComparisonParams& op_params, \
|
||||
const RuntimeShape& input1_shape, const float* input1_data, \
|
||||
const RuntimeShape& input2_shape, const float* input2_data, \
|
||||
const RuntimeShape& output_shape, bool* output_data) { \
|
||||
Comparison<name##Fn>(op_params, input1_shape, input1_data, input2_shape, \
|
||||
input2_data, output_shape, output_data); \
|
||||
} \
|
||||
template <typename T> \
|
||||
inline void name##NoScaling( \
|
||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
|
||||
const T* input1_data, const RuntimeShape& input2_shape, \
|
||||
const T* input2_data, const RuntimeShape& output_shape, \
|
||||
bool* output_data) { \
|
||||
ComparisonImpl<T, name##Fn>(op_params, input1_shape, input1_data, \
|
||||
input2_shape, input2_data, output_shape, \
|
||||
output_data); \
|
||||
} \
|
||||
template <typename T> \
|
||||
inline void name##WithScaling( \
|
||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
|
||||
const T* input1_data, const RuntimeShape& input2_shape, \
|
||||
const T* input2_data, const RuntimeShape& output_shape, \
|
||||
bool* output_data) { \
|
||||
ComparisonWithScaling<T, name##Fn>(op_params, input1_shape, input1_data, \
|
||||
input2_shape, input2_data, \
|
||||
output_shape, output_data); \
|
||||
} \
|
||||
template <typename T> \
|
||||
inline void Broadcast4DSlow##name##NoScaling( \
|
||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
|
||||
const T* input1_data, const RuntimeShape& input2_shape, \
|
||||
const T* input2_data, const RuntimeShape& output_shape, \
|
||||
bool* output_data) { \
|
||||
BroadcastComparison4DSlowImpl<T, name##Fn>( \
|
||||
op_params, input1_shape, input1_data, input2_shape, input2_data, \
|
||||
output_shape, output_data); \
|
||||
} \
|
||||
inline void Broadcast4DSlow##name( \
|
||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
|
||||
const float* input1_data, const RuntimeShape& input2_shape, \
|
||||
const float* input2_data, const RuntimeShape& output_shape, \
|
||||
bool* output_data) { \
|
||||
BroadcastComparison4DSlow<name##Fn>(op_params, input1_shape, input1_data, \
|
||||
input2_shape, input2_data, \
|
||||
output_shape, output_data); \
|
||||
} \
|
||||
template <typename T> \
|
||||
inline void Broadcast4DSlow##name##WithScaling( \
|
||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape, \
|
||||
const T* input1_data, const RuntimeShape& input2_shape, \
|
||||
const T* input2_data, const RuntimeShape& output_shape, \
|
||||
bool* output_data) { \
|
||||
BroadcastComparison4DSlowWithScaling<T, name##Fn>( \
|
||||
op_params, input1_shape, input1_data, input2_shape, input2_data, \
|
||||
output_shape, output_data); \
|
||||
}
|
||||
TFLITE_COMPARISON_OP(Equal);
|
||||
TFLITE_COMPARISON_OP(NotEqual);
|
||||
TFLITE_COMPARISON_OP(Greater);
|
||||
TFLITE_COMPARISON_OP(GreaterEqual);
|
||||
TFLITE_COMPARISON_OP(Less);
|
||||
TFLITE_COMPARISON_OP(LessEqual);
|
||||
#undef TFLITE_COMPARISON_OP
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
|
||||
@@ -0,0 +1,140 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
template <typename Scalar>
|
||||
inline void Concatenation(const ConcatenationParams& params,
|
||||
const RuntimeShape* const* input_shapes,
|
||||
const Scalar* const* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
Scalar* output_data) {
|
||||
int axis = params.axis;
|
||||
int inputs_count = params.inputs_count;
|
||||
const int concat_dimensions = output_shape.DimensionsCount();
|
||||
TFLITE_DCHECK_LT(axis, concat_dimensions);
|
||||
|
||||
int64_t concat_size = 0;
|
||||
for (int i = 0; i < inputs_count; i++) {
|
||||
TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
|
||||
for (int j = 0; j < concat_dimensions; j++) {
|
||||
if (j != axis) {
|
||||
MatchingDim(*input_shapes[i], j, output_shape, j);
|
||||
}
|
||||
}
|
||||
concat_size += input_shapes[i]->Dims(axis);
|
||||
}
|
||||
TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
|
||||
int64_t outer_size = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
outer_size *= output_shape.Dims(i);
|
||||
}
|
||||
// For all input arrays,
|
||||
// FlatSize() = outer_size * Dims(axis) * base_inner_size;
|
||||
int64_t base_inner_size = 1;
|
||||
for (int i = axis + 1; i < concat_dimensions; ++i) {
|
||||
base_inner_size *= output_shape.Dims(i);
|
||||
}
|
||||
|
||||
Scalar* output_ptr = output_data;
|
||||
for (int k = 0; k < outer_size; k++) {
|
||||
for (int i = 0; i < inputs_count; ++i) {
|
||||
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
|
||||
const Scalar* input_ptr = input_data[i] + k * copy_size;
|
||||
memcpy(output_ptr, input_ptr, copy_size * sizeof(Scalar));
|
||||
output_ptr += copy_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(prabhumk): This is the same as the optimized implementation.
|
||||
// TODO(prabhumk): The quantized implementation of concatentation isn't fully
|
||||
// quantized as it takes scale as a floating point value. This should be fixed
|
||||
// when optimizng this routine further.
|
||||
inline void ConcatenationWithScaling(const ConcatenationParams& params,
|
||||
const RuntimeShape* const* input_shapes,
|
||||
const uint8* const* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
int axis = params.axis;
|
||||
const int32* input_zeropoint = params.input_zeropoint;
|
||||
const float* input_scale = params.input_scale;
|
||||
int inputs_count = params.inputs_count;
|
||||
const int32 output_zeropoint = params.output_zeropoint;
|
||||
const float output_scale = params.output_scale;
|
||||
|
||||
const int concat_dimensions = output_shape.DimensionsCount();
|
||||
TFLITE_DCHECK_LT(axis, concat_dimensions);
|
||||
|
||||
int64_t concat_size = 0;
|
||||
for (int i = 0; i < inputs_count; i++) {
|
||||
TFLITE_DCHECK_EQ(input_shapes[i]->DimensionsCount(), concat_dimensions);
|
||||
for (int j = 0; j < concat_dimensions; j++) {
|
||||
if (j != axis) {
|
||||
MatchingDim(*input_shapes[i], j, output_shape, j);
|
||||
}
|
||||
}
|
||||
concat_size += input_shapes[i]->Dims(axis);
|
||||
}
|
||||
TFLITE_DCHECK_EQ(concat_size, output_shape.Dims(axis));
|
||||
int64_t outer_size = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
outer_size *= output_shape.Dims(i);
|
||||
}
|
||||
// For all input arrays,
|
||||
// FlatSize() = outer_size * Dims(axis) * base_inner_size;
|
||||
int64_t base_inner_size = 1;
|
||||
for (int i = axis + 1; i < concat_dimensions; ++i) {
|
||||
base_inner_size *= output_shape.Dims(i);
|
||||
}
|
||||
|
||||
const float inverse_output_scale = 1.f / output_scale;
|
||||
uint8* output_ptr = output_data;
|
||||
for (int k = 0; k < outer_size; k++) {
|
||||
for (int i = 0; i < inputs_count; ++i) {
|
||||
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
|
||||
const uint8* input_ptr = input_data[i] + k * copy_size;
|
||||
if (input_zeropoint[i] == output_zeropoint &&
|
||||
input_scale[i] == output_scale) {
|
||||
memcpy(output_ptr, input_ptr, copy_size);
|
||||
} else {
|
||||
const float scale = input_scale[i] * inverse_output_scale;
|
||||
const float bias = -input_zeropoint[i] * scale;
|
||||
for (int j = 0; j < copy_size; ++j) {
|
||||
const int32_t value = static_cast<int32_t>(tflite::TfLiteRound(
|
||||
input_ptr[j] * scale + bias)) +
|
||||
output_zeropoint;
|
||||
output_ptr[j] = static_cast<uint8_t>(
|
||||
std::max<int32_t>(std::min<int32_t>(255, value), 0));
|
||||
}
|
||||
}
|
||||
output_ptr += copy_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
|
||||
@@ -0,0 +1,262 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
|
||||
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
const float* input_data, const RuntimeShape& filter_shape,
|
||||
const float* filter_data, const RuntimeShape& bias_shape,
|
||||
const float* bias_data, const RuntimeShape& output_shape,
|
||||
float* output_data, const RuntimeShape& im2col_shape,
|
||||
float* im2col_data) {
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const float output_activation_min = params.float_activation_min;
|
||||
const float output_activation_max = params.float_activation_max;
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
(void)im2col_data; // only used in optimized code.
|
||||
(void)im2col_shape; // only used in optimized code.
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
|
||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
||||
if (bias_data) {
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
}
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
float total = 0.f;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// If the location is outside the bounds of the input image,
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
float input_value = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
float filter_value =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
total += (input_value * filter_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
float bias_value = 0.0f;
|
||||
if (bias_data) {
|
||||
bias_value = bias_data[out_channel];
|
||||
}
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
||||
ActivationFunctionWithMinMax(total + bias_value,
|
||||
output_activation_min,
|
||||
output_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data, const RuntimeShape& im2col_shape,
|
||||
uint8* im2col_data, void* cpu_backend_context) {
|
||||
(void)cpu_backend_context; // only used in optimized code.
|
||||
(void)im2col_data; // only used in optimized code.
|
||||
(void)im2col_shape; // only used in optimized code.
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
|
||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
||||
if (bias_data) {
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
}
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// If the location is outside the bounds of the input image,
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
acc +=
|
||||
(filter_val + filter_offset) * (input_val + input_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[out_channel];
|
||||
}
|
||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
|
||||
output_shift);
|
||||
acc += output_offset;
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
||||
static_cast<uint8>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void HybridConvPerChannel(
|
||||
const ConvParams& params, float* scaling_factors_ptr,
|
||||
const RuntimeShape& input_shape, const int8_t* input_data,
|
||||
const RuntimeShape& filter_shape, const int8_t* filter_data,
|
||||
const RuntimeShape& bias_shape, const float* bias_data,
|
||||
const RuntimeShape& output_shape, float* output_data,
|
||||
const RuntimeShape& im2col_shape, int8_t* im2col_data,
|
||||
const float* per_channel_scale, int32_t* input_offset) {
|
||||
(void)im2col_data; // only used in optimized code.
|
||||
(void)im2col_shape; // only used in optimized code.
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const float output_activation_min = params.float_activation_min;
|
||||
const float output_activation_max = params.float_activation_max;
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
|
||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
||||
if (bias_data) {
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
}
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// If the location is outside the bounds of the input image,
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
acc += filter_val * (input_val - input_offset[batch]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
float acc_float =
|
||||
acc * per_channel_scale[out_channel] * scaling_factors_ptr[batch];
|
||||
if (bias_data) {
|
||||
acc_float += bias_data[out_channel];
|
||||
}
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
||||
ActivationFunctionWithMinMax(acc_float, output_activation_min,
|
||||
output_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
||||
@@ -0,0 +1,100 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
inline void DepthwiseConv(
|
||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
||||
const float* input_data, const RuntimeShape& filter_shape,
|
||||
const float* filter_data, const RuntimeShape& bias_shape,
|
||||
const float* bias_data, const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const float output_activation_min = params.float_activation_min;
|
||||
const float output_activation_max = params.float_activation_max;
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int input_depth = input_shape.Dims(3);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int ic = 0; ic < input_depth; ++ic) {
|
||||
for (int m = 0; m < depth_multiplier; m++) {
|
||||
const int oc = m + ic * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
float total = 0.f;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// If the location is outside the bounds of the input image,
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
float input_value =
|
||||
input_data[Offset(input_shape, b, in_y, in_x, ic)];
|
||||
float filter_value = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, oc)];
|
||||
total += (input_value * filter_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
float bias_value = 0.0f;
|
||||
if (bias_data) {
|
||||
bias_value = bias_data[oc];
|
||||
}
|
||||
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
|
||||
ActivationFunctionWithMinMax(total + bias_value,
|
||||
output_activation_min,
|
||||
output_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace reference_ops
|
||||
} // end namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_FLOAT_H_
|
||||
@@ -0,0 +1,297 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Used in tests and template parameters to control which version of depthwise
|
||||
// convolution is called. Primarily for reference code, and specializations
|
||||
// forced in tests.
|
||||
enum class DepthwiseConvImplementation {
|
||||
// Run all tests against kUseStandardEntry even if also testing another
|
||||
// kernel, since we need to be sure that the main DepthwiseConv() function in
|
||||
// optimized_ops.h dispatches to a correctly-executing kernel.
|
||||
kNone = 0, // The "default" option: use the normal
|
||||
// DepthwiseConv kernel (entry) function.
|
||||
kUseGenericKernel, // Forced use of generic kernel.
|
||||
kUseNeon3x3, // 3x3 kernel that uses NEON when available.
|
||||
kUseNeon3x3DotProduct, // 3x3 kernel that uses dot-product enabled NEON
|
||||
// when available.
|
||||
kUseCModel3x3DotProduct, // 3x3 kernel, reference C model that is intended
|
||||
// to match overall design NEON code.
|
||||
kUseUnwound3x3DotProduct, // 3x3 kernel, reference C model with unwound loops
|
||||
// and some arrays.
|
||||
kUseIntrinsics3x3DotProduct, // 3x3 kernel using NEON intrinsics.
|
||||
};
|
||||
|
||||
// Category of depthwise convolution output rounding.
|
||||
enum class DepthwiseConvOutputRounding {
|
||||
kNone = 0, // Invalid: specific method must be specified.
|
||||
kAwayFromZero, // Original method: exact halves rounded away from zero.
|
||||
kUpward, // Halves towards +infinity: adds 0.5 before truncate.
|
||||
// This is where a future kNearestEven would be placed.
|
||||
};
|
||||
|
||||
// Category of depthwise convolution depth multiplication.
|
||||
enum class DepthwiseConvDepthMultiplication {
|
||||
kNoMultiplication = 0, // Depth multiplier = 1.
|
||||
kUnitInputDepth, // Input depth = 1, output depth = depth multiplier.
|
||||
};
|
||||
|
||||
namespace reference_ops {
|
||||
namespace depthwise_conv {
|
||||
|
||||
template <DepthwiseConvOutputRounding output_rounding>
|
||||
inline int32 DepthwiseConvRound(int32 x, int32 quantized_multiplier,
|
||||
int shift) {
|
||||
TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
|
||||
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
|
||||
int32 x, int32 quantized_multiplier, int shift) {
|
||||
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
|
||||
int32 x, int32 quantized_multiplier, int shift) {
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
const int left_shift = shift > 0 ? shift : 0;
|
||||
const int right_shift = shift > 0 ? 0 : -shift;
|
||||
const int rounding_offset = right_shift > 0 ? 1 << (right_shift - 1) : 0;
|
||||
return (SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
|
||||
quantized_multiplier) +
|
||||
rounding_offset) >>
|
||||
right_shift;
|
||||
}
|
||||
|
||||
template <DepthwiseConvOutputRounding output_rounding>
|
||||
struct DepthwiseConvBasicKernel {
|
||||
static inline void Run(const DepthwiseParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data,
|
||||
const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data,
|
||||
const RuntimeShape& bias_shape, const int32* bias_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int input_depth = input_shape.Dims(3);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int ic = 0; ic < input_depth; ++ic) {
|
||||
for (int m = 0; m < depth_multiplier; m++) {
|
||||
const int oc = m + ic * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x =
|
||||
in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// If the location is outside the bounds of the input image,
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
int32 input_val =
|
||||
input_data[Offset(input_shape, b, in_y, in_x, ic)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, oc)];
|
||||
acc += (filter_val + filter_offset) *
|
||||
(input_val + input_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[oc];
|
||||
}
|
||||
acc = DepthwiseConvRound<output_rounding>(acc, output_multiplier,
|
||||
output_shift);
|
||||
acc += output_offset;
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
|
||||
static_cast<uint8>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(b/148596273): Reconcile reference versions, perhaps with common
|
||||
// MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
|
||||
static inline void RunPerChannel(
|
||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
// Get parameters.
|
||||
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32* output_multiplier = params.output_multiplier_per_channel;
|
||||
const int32* output_shift = params.output_shift_per_channel;
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int input_depth = input_shape.Dims(3);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
for (int m = 0; m < depth_multiplier; ++m) {
|
||||
const int output_channel = m + in_channel * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x =
|
||||
in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
// Accumulate with 32 bits accumulator.
|
||||
// In the nudging process during model quantization, we
|
||||
// force real value of 0.0 be represented by a quantized
|
||||
// value. This guarantees that the input_offset is a int8,
|
||||
// even though it is represented using int32. int32 += int8
|
||||
// * (int8 - int8) so the highest value we can get from each
|
||||
// accumulation is [-127, 127] * ([-128, 127] -
|
||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
||||
// = 14.98, which means we can accumulate at least 2^16
|
||||
// multiplications without overflow. The accumulator is
|
||||
// applied to a filter so the accumulation logic will hold
|
||||
// as long as the filter size (filter_y * filter_x *
|
||||
// in_channel) does not exceed 2^16, which is the case in
|
||||
// all the models we have seen so far.
|
||||
acc += filter_val * (input_val + input_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[output_channel];
|
||||
}
|
||||
acc = DepthwiseConvRound<output_rounding>(
|
||||
acc, output_multiplier[output_channel],
|
||||
output_shift[output_channel]);
|
||||
acc += output_offset;
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x,
|
||||
output_channel)] = static_cast<int8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace depthwise_conv
|
||||
|
||||
inline void DepthwiseConv(
|
||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
return depthwise_conv::DepthwiseConvBasicKernel<
|
||||
DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
|
||||
input_data, filter_shape,
|
||||
filter_data, bias_shape,
|
||||
bias_data, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // end namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTHWISECONV_UINT8_H_
|
||||
@@ -0,0 +1,78 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
// Dequantizes into a float without rounding.
|
||||
template <typename InputT, typename OutputT>
|
||||
inline void Dequantize(const tflite::DequantizationParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const InputT* input_data,
|
||||
const RuntimeShape& output_shape, OutputT* output_data) {
|
||||
int32 zero_point = op_params.zero_point;
|
||||
const double scale = op_params.scale;
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
const int32 val = input_data[i];
|
||||
const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
|
||||
output_data[i] = result;
|
||||
}
|
||||
}
|
||||
|
||||
// Dequantizes per-channel quantized tensor to float.
|
||||
template <typename T>
|
||||
inline void PerChannelDequantize(
|
||||
const tflite::PerChannelDequantizationParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
// Ensure flat size is same.
|
||||
MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
const int32* zero_point = op_params.zero_point;
|
||||
const float* scale = op_params.scale;
|
||||
const int32 quantized_dimension = op_params.quantized_dimension;
|
||||
const int32 num_dims = input_shape.DimensionsCount();
|
||||
const int32* dims_data = input_shape.DimsData();
|
||||
std::vector<int> current_dim(num_dims, 0);
|
||||
|
||||
do {
|
||||
size_t offset =
|
||||
ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
|
||||
current_dim.data(), 0, nullptr);
|
||||
const int channel = current_dim[quantized_dimension];
|
||||
const int32 val = input_data[offset];
|
||||
const float result =
|
||||
static_cast<float>(scale[channel] * (val - zero_point[channel]));
|
||||
output_data[offset] = result;
|
||||
} while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
|
||||
current_dim.data()));
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEQUANTIZE_H_
|
||||
@@ -0,0 +1,39 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
inline void Floor(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
int offset = i;
|
||||
output_data[offset] = std::floor(input_data[offset]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_H_
|
||||
@@ -0,0 +1,319 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const float* input_data, const RuntimeShape& weights_shape,
|
||||
const float* weights_data, const RuntimeShape& bias_shape,
|
||||
const float* bias_data, const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
const float output_activation_min = params.float_activation_min;
|
||||
const float output_activation_max = params.float_activation_max;
|
||||
// TODO(benoitjacob): This really should be:
|
||||
// const int batches = ArraySize(output_dims, 1);
|
||||
// but the current --variable_batch hack consists in overwriting the 3rd
|
||||
// dimension with the runtime batch size, as we don't keep track for each
|
||||
// array of which dimension is the batch dimension in it.
|
||||
const int output_dims_count = output_shape.DimensionsCount();
|
||||
const int weights_dims_count = weights_shape.DimensionsCount();
|
||||
const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
|
||||
const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
|
||||
output_shape, output_dims_count - 1);
|
||||
const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
float total = 0.f;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
total += input_data[b * accum_depth + d] *
|
||||
weights_data[out_c * accum_depth + d];
|
||||
}
|
||||
float bias_value = 0.0f;
|
||||
if (bias_data) {
|
||||
bias_value = bias_data[out_c];
|
||||
}
|
||||
output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
|
||||
total + bias_value, output_activation_min, output_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
// TODO(benoitjacob): This really should be:
|
||||
// const int batches = ArraySize(output_dims, 1);
|
||||
// but the current --variable_batch hack consists in overwriting the 3rd
|
||||
// dimension with the runtime batch size, as we don't keep track for each
|
||||
// array of which dimension is the batch dimension in it.
|
||||
const int output_dim_count = output_shape.DimensionsCount();
|
||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
||||
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
|
||||
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
|
||||
output_shape, output_dim_count - 1);
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
int32 acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32 input_val = input_data[b * accum_depth + d];
|
||||
int32 filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += (filter_val + filter_offset) * (input_val + input_offset);
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[out_c];
|
||||
}
|
||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
|
||||
acc += output_offset;
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[out_c + output_depth * b] = static_cast<uint8>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
TFLITE_DCHECK_EQ(output_offset, 0);
|
||||
// TODO(benoitjacob): This really should be:
|
||||
// const int batches = ArraySize(output_dims, 1);
|
||||
// but the current --variable_batch hack consists in overwriting the 3rd
|
||||
// dimension with the runtime batch size, as we don't keep track for each
|
||||
// array of which dimension is the batch dimension in it.
|
||||
const int output_dim_count = output_shape.DimensionsCount();
|
||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
||||
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
|
||||
const int output_depth = MatchingDim(filter_shape, filter_dim_count - 2,
|
||||
output_shape, output_dim_count - 1);
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
// Internal accumulation.
|
||||
// Initialize accumulator with the bias-value.
|
||||
int32 accum = bias_data[out_c];
|
||||
// Accumulation loop.
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int16 input_val = input_data[b * accum_depth + d] + input_offset;
|
||||
int16 filter_val = filter_data[out_c * accum_depth + d] + filter_offset;
|
||||
accum += filter_val * input_val;
|
||||
}
|
||||
// Down-scale the final int32 accumulator to the scale used by our
|
||||
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
|
||||
// multiplier and shift here have been pre-computed offline
|
||||
// (e.g. by toco).
|
||||
accum =
|
||||
MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
|
||||
// Saturate, cast to int16, and store to output array.
|
||||
accum = std::max(accum, output_activation_min - output_offset);
|
||||
accum = std::min(accum, output_activation_max - output_offset);
|
||||
accum += output_offset;
|
||||
output_data[out_c + output_depth * b] = accum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void ShuffledFullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& weights_shape,
|
||||
const uint8* shuffled_weights_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data, uint8* shuffled_input_workspace_data) {
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
|
||||
TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
|
||||
TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
|
||||
// TODO(benoitjacob): This really should be:
|
||||
// const int batches = ArraySize(output_dims, 1);
|
||||
// but the current --variable_batch hack consists in overwriting the 3rd
|
||||
// dimension with the runtime batch size, as we don't keep track for each
|
||||
// array of which dimension is the batch dimension in it.
|
||||
const int output_dim_count = output_shape.DimensionsCount();
|
||||
const int weights_dim_count = weights_shape.DimensionsCount();
|
||||
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
|
||||
const int output_depth = MatchingDim(weights_shape, weights_dim_count - 2,
|
||||
output_shape, output_dim_count - 1);
|
||||
const int accum_depth = weights_shape.Dims(weights_dim_count - 1);
|
||||
TFLITE_DCHECK((accum_depth % 16) == 0);
|
||||
TFLITE_DCHECK((output_depth % 4) == 0);
|
||||
|
||||
// Shuffling and xoring of input activations into the workspace buffer
|
||||
uint8* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
|
||||
if (batches == 1) {
|
||||
for (int i = 0; i < accum_depth; i++) {
|
||||
shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
|
||||
}
|
||||
} else if (batches == 4) {
|
||||
for (int c = 0; c < accum_depth; c += 16) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
const uint8* src_data_ptr = input_data + b * accum_depth + c;
|
||||
for (int j = 0; j < 16; j++) {
|
||||
uint8 src_val = *src_data_ptr++;
|
||||
// Flip the sign bit, so that the kernel will only need to
|
||||
// reinterpret these uint8 values as int8, getting for free the
|
||||
// subtraction of the zero_point value 128.
|
||||
uint8 dst_val = src_val ^ 0x80;
|
||||
*shuffled_input_workspace_ptr++ = dst_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
TFLITE_DCHECK(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Actual computation
|
||||
if (batches == 1) {
|
||||
int16* output_ptr = output_data;
|
||||
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
|
||||
// so that just reinterpreting them as int8 values is equivalent to
|
||||
// subtracting 128 from them, thus implementing for free the subtraction of
|
||||
// the zero_point value 128.
|
||||
const int8* shuffled_weights_ptr =
|
||||
reinterpret_cast<const int8*>(shuffled_weights_data);
|
||||
// Likewise, we preshuffled and pre-xored the input data above.
|
||||
const int8* shuffled_input_data =
|
||||
reinterpret_cast<const int8*>(shuffled_input_workspace_data);
|
||||
for (int c = 0; c < output_depth; c += 4) {
|
||||
// Internal accumulation.
|
||||
// Initialize accumulator with the bias-value.
|
||||
int32 accum[4] = {0};
|
||||
// Accumulation loop.
|
||||
for (int d = 0; d < accum_depth; d += 16) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 16; j++) {
|
||||
int8 input_val = shuffled_input_data[d + j];
|
||||
int8 weights_val = *shuffled_weights_ptr++;
|
||||
accum[i] += weights_val * input_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 4; i++) {
|
||||
// Add bias value
|
||||
int32 acc = accum[i] + bias_data[c + i];
|
||||
// Down-scale the final int32 accumulator to the scale used by our
|
||||
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
|
||||
// multiplier and shift here have been pre-computed offline
|
||||
// (e.g. by toco).
|
||||
acc =
|
||||
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
|
||||
// Saturate, cast to int16, and store to output array.
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_ptr[c + i] = acc;
|
||||
}
|
||||
}
|
||||
} else if (batches == 4) {
|
||||
int16* output_ptr = output_data;
|
||||
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
|
||||
// so that just reinterpreting them as int8 values is equivalent to
|
||||
// subtracting 128 from them, thus implementing for free the subtraction of
|
||||
// the zero_point value 128.
|
||||
const int8* shuffled_weights_ptr =
|
||||
reinterpret_cast<const int8*>(shuffled_weights_data);
|
||||
// Likewise, we preshuffled and pre-xored the input data above.
|
||||
const int8* shuffled_input_data =
|
||||
reinterpret_cast<const int8*>(shuffled_input_workspace_data);
|
||||
for (int c = 0; c < output_depth; c += 4) {
|
||||
const int8* shuffled_input_ptr = shuffled_input_data;
|
||||
// Accumulation loop.
|
||||
// Internal accumulation.
|
||||
// Initialize accumulator with the bias-value.
|
||||
int32 accum[4][4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
accum[i][b] = 0;
|
||||
}
|
||||
}
|
||||
for (int d = 0; d < accum_depth; d += 16) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
for (int j = 0; j < 16; j++) {
|
||||
int8 input_val = shuffled_input_ptr[16 * b + j];
|
||||
int8 weights_val = shuffled_weights_ptr[16 * i + j];
|
||||
accum[i][b] += weights_val * input_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
shuffled_input_ptr += 64;
|
||||
shuffled_weights_ptr += 64;
|
||||
}
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
// Add bias value
|
||||
int32 acc = accum[i][b] + bias_data[c + i];
|
||||
// Down-scale the final int32 accumulator to the scale used by our
|
||||
// (16-bit, typically 3 integer bits) fixed-point format. The
|
||||
// quantized multiplier and shift here have been pre-computed offline
|
||||
// (e.g. by toco).
|
||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
|
||||
output_shift);
|
||||
// Saturate, cast to int16, and store to output array.
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_ptr[b * output_depth + c + i] = acc;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
TFLITE_DCHECK(false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
|
||||
@@ -0,0 +1,143 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
// Element-wise add that can often be used for inner loop of broadcast add as
|
||||
// well as the non-broadcast add.
|
||||
inline void AddElementwise(int size, const ArithmeticParams& params,
|
||||
const int8_t* input1_data, const int8_t* input2_data,
|
||||
int8_t* output_data) {
|
||||
const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
|
||||
TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<int8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const int8_t* input2_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
|
||||
TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
|
||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int8_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int8_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
const RuntimeShape extended_output_shape =
|
||||
RuntimeShape::ExtendedShape(4, output_shape);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
const int32_t input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32_t input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
const int32_t shifted_input1_val =
|
||||
input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val =
|
||||
input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier,
|
||||
params.input1_shift);
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier,
|
||||
params.input2_shift);
|
||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
static_cast<int8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_integer_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
|
||||
@@ -0,0 +1,217 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
// Fixed-point per-channel-quantization convolution reference kernel.
|
||||
inline void ConvPerChannel(
|
||||
const ConvParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
// Get parameters.
|
||||
const int32 input_offset = params.input_offset; // r = s(q - Z)
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int32 output_offset = params.output_offset;
|
||||
|
||||
// Set min and max value of the output.
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Sanity check.
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
|
||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
||||
if (bias_data) {
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
}
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
// Accumulate with 32 bits accumulator.
|
||||
// In the nudging process during model quantization, we force
|
||||
// real value of 0.0 be represented by a quantized value. This
|
||||
// guarantees that the input_offset is a int8, even though it
|
||||
// is represented using int32.
|
||||
// int32 += int8 * (int8 - int8) so the highest value we can
|
||||
// get from each accumulation is [-127, 127] * ([-128, 127] -
|
||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
||||
// = 14.98, which means we can accumulate at least 2^16
|
||||
// multiplications without overflow. The accumulator is
|
||||
// applied to a filter so the accumulation logic will hold as
|
||||
// long as the filter size (filter_y * filter_x * in_channel)
|
||||
// does not exceed 2^16, which is the case in all the models
|
||||
// we have seen so far.
|
||||
// TODO(jianlijianli): Add a check to make sure the
|
||||
// accumulator depth is smaller than 2^16.
|
||||
acc += filter_val * (input_val + input_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bias_data) {
|
||||
acc += bias_data[out_channel];
|
||||
}
|
||||
acc = MultiplyByQuantizedMultiplier(
|
||||
acc, output_multiplier[out_channel], output_shift[out_channel]);
|
||||
acc += output_offset;
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
||||
static_cast<int8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fixed-point per-channel-quantization convolution reference kernel.
|
||||
// 16-bit data and 8-bit filter
|
||||
inline void ConvPerChannel(
|
||||
const ConvParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int16* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const std::int64_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
// Get parameters.
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
|
||||
// Set min and max value of the output.
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Sanity check.
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
|
||||
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
|
||||
if (bias_data) {
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
}
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
std::int64_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
// Accumulate with 64 bits accumulator.
|
||||
// int64 += int8 * int16 so the highest value we can
|
||||
// get from each accumulation is [-127, 127] * ([-32768,
|
||||
// 32767] -
|
||||
// [-32768, 32767]), which is [-8322945, 8322945].
|
||||
// log2(8322945) = 22.99.
|
||||
acc += filter_val * input_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[out_channel];
|
||||
}
|
||||
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
|
||||
acc, output_multiplier[out_channel], output_shift[out_channel]);
|
||||
scaled_acc = std::max(scaled_acc, output_activation_min);
|
||||
scaled_acc = std::min(scaled_acc, output_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
||||
static_cast<int16_t>(scaled_acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_integer_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
|
||||
@@ -0,0 +1,289 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
inline void DepthwiseConvPerChannel(
|
||||
const DepthwiseParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
// Get parameters.
|
||||
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int input_depth = input_shape.Dims(3);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
for (int m = 0; m < depth_multiplier; ++m) {
|
||||
const int output_channel = m + in_channel * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
// Accumulate with 32 bits accumulator.
|
||||
// In the nudging process during model quantization, we force
|
||||
// real value of 0.0 be represented by a quantized value. This
|
||||
// guarantees that the input_offset is a int8, even though it
|
||||
// is represented using int32.
|
||||
// int32 += int8 * (int8 - int8) so the highest value we can
|
||||
// get from each accumulation is [-127, 127] * ([-128, 127] -
|
||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
||||
// = 14.98, which means we can accumulate at least 2^16
|
||||
// multiplications without overflow. The accumulator is
|
||||
// applied to a filter so the accumulation logic will hold as
|
||||
// long as the filter size (filter_y * filter_x * in_channel)
|
||||
// does not exceed 2^16, which is the case in all the models
|
||||
// we have seen so far.
|
||||
// TODO(jianlijianli): Add a check to make sure the
|
||||
// accumulator depth is smaller than 2^16.
|
||||
acc += filter_val * (input_val + input_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[output_channel];
|
||||
}
|
||||
acc = MultiplyByQuantizedMultiplier(
|
||||
acc, output_multiplier[output_channel],
|
||||
output_shift[output_channel]);
|
||||
acc += output_offset;
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x,
|
||||
output_channel)] = static_cast<int8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void DepthwiseConvPerChannel(
|
||||
const DepthwiseParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int16* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const std::int64_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
// Get parameters.
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int input_depth = input_shape.Dims(3);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
for (int m = 0; m < depth_multiplier; ++m) {
|
||||
const int output_channel = m + in_channel * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
std::int64_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
// Accumulate with 64 bits accumulator.
|
||||
// We assume maximum of 2^16 accumulations as with the 8-bit
|
||||
// case so actually the value in the accumulator should not
|
||||
// exceed 40 bits
|
||||
acc += static_cast<int64_t>(filter_val) *
|
||||
static_cast<int64_t>(input_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[output_channel];
|
||||
}
|
||||
int32 scaled_acc = MultiplyByQuantizedMultiplier(
|
||||
acc, output_multiplier[output_channel],
|
||||
output_shift[output_channel]);
|
||||
scaled_acc = std::max(scaled_acc, output_activation_min);
|
||||
scaled_acc = std::min(scaled_acc, output_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x,
|
||||
output_channel)] =
|
||||
static_cast<int16_t>(scaled_acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void DepthwiseConvHybridPerChannel(
|
||||
const DepthwiseParams& params, float* scaling_factors_ptr,
|
||||
const RuntimeShape& input_shape, const int8* input_data,
|
||||
const RuntimeShape& filter_shape, const int8* filter_data,
|
||||
const RuntimeShape& bias_shape, const float* bias_data,
|
||||
const RuntimeShape& output_shape, float* output_data,
|
||||
const float* per_channel_scale, int32_t* input_offset) {
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const float output_activation_min = params.float_activation_min;
|
||||
const float output_activation_max = params.float_activation_max;
|
||||
// Check dimensions of the tensors.
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int input_depth = input_shape.Dims(3);
|
||||
const int filter_height = filter_shape.Dims(1);
|
||||
const int filter_width = filter_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int bias_depth = bias_shape.FlatSize();
|
||||
TFLITE_DCHECK_EQ(output_depth, input_depth * depth_multiplier);
|
||||
TFLITE_DCHECK_EQ(bias_depth, output_depth);
|
||||
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
for (int m = 0; m < depth_multiplier; ++m) {
|
||||
const int output_channel = m + in_channel * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
acc += filter_val * (input_val - input_offset[batch]);
|
||||
}
|
||||
}
|
||||
}
|
||||
float acc_float = static_cast<float>(acc);
|
||||
acc_float *=
|
||||
per_channel_scale[output_channel] * scaling_factors_ptr[batch];
|
||||
if (bias_data && output_channel < bias_depth) {
|
||||
acc_float += bias_data[output_channel];
|
||||
}
|
||||
output_data[Offset(output_shape, batch, out_y, out_x,
|
||||
output_channel)] =
|
||||
ActivationFunctionWithMinMax(acc_float, output_activation_min,
|
||||
output_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_integer_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
|
||||
@@ -0,0 +1,108 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
||||
const int batches = output_shape.Dims(0);
|
||||
const int output_depth = output_shape.Dims(1);
|
||||
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
int32 acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32 input_val = input_data[b * accum_depth + d];
|
||||
int32 filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += (filter_val + filter_offset) * (input_val + input_offset);
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[out_c];
|
||||
}
|
||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
|
||||
acc += output_offset;
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int64_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
||||
const int batches = output_shape.Dims(0);
|
||||
const int output_depth = output_shape.Dims(1);
|
||||
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
int64_t acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32 input_val = input_data[b * accum_depth + d];
|
||||
int32 filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += (filter_val + filter_offset) * input_val;
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[out_c];
|
||||
}
|
||||
int32_t acc_scaled =
|
||||
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
|
||||
acc_scaled = std::max(acc_scaled, output_activation_min);
|
||||
acc_scaled = std::min(acc_scaled, output_activation_max);
|
||||
output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_integer_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
|
||||
@@ -0,0 +1,65 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
|
||||
int32_t depth, const int8* input_data,
|
||||
int8* output_data) {
|
||||
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
|
||||
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
|
||||
// The output scale must be in sync with Prepare().
|
||||
// Output is in 1/128 scale so the actual output range is nudged from [-1, 1]
|
||||
// to [-1, 127/128].
|
||||
static constexpr int32_t kOutputScale = 7;
|
||||
for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
|
||||
// int32 = (int8 - int8) ^ 2.
|
||||
// ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
|
||||
// safe from overflowing in at least 2^16 steps.
|
||||
int32_t acc = 0;
|
||||
for (int inner_index = 0; inner_index < depth; ++inner_index) {
|
||||
int32_t input =
|
||||
input_data[depth * outer_index + inner_index] - input_zero_point;
|
||||
acc += input * input;
|
||||
}
|
||||
int32_t inv_l2norm_multiplier;
|
||||
int inv_l2norm_shift;
|
||||
GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
|
||||
&inv_l2norm_shift);
|
||||
|
||||
for (int inner_index = 0; inner_index < depth; ++inner_index) {
|
||||
int32_t input =
|
||||
input_data[depth * outer_index + inner_index] - input_zero_point;
|
||||
|
||||
// Rescale and downcast. Rescale is folded into the division.
|
||||
int32_t output_in_q24 = MultiplyByQuantizedMultiplier(
|
||||
input, inv_l2norm_multiplier, inv_l2norm_shift + kOutputScale);
|
||||
output_in_q24 =
|
||||
std::min(static_cast<int32_t>(kMaxInt8),
|
||||
std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
|
||||
output_data[depth * outer_index + inner_index] =
|
||||
static_cast<int8>(output_in_q24);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace reference_integer_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
|
||||
@@ -0,0 +1,96 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
|
||||
|
||||
#include <limits>
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
|
||||
int32_t input_multiplier, int32_t input_left_shift,
|
||||
int32_t input_size, const int8_t* input_data,
|
||||
int8_t* output_data) {
|
||||
// Integer bits must be in sync with Prepare() function.
|
||||
static constexpr int32_t kInputIntegerBits = 4;
|
||||
static constexpr int32_t kOutputIntegerBits = 8;
|
||||
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
|
||||
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
|
||||
static constexpr int32_t kOutputZeroPoint = -128;
|
||||
|
||||
for (int i = 0; i < input_size; ++i) {
|
||||
const int32_t input =
|
||||
static_cast<int32_t>(input_data[i]) - input_zero_point;
|
||||
if (input <= -input_range_radius) {
|
||||
output_data[i] = kMinInt8;
|
||||
} else if (input >= input_range_radius) {
|
||||
output_data[i] = kMaxInt8;
|
||||
} else {
|
||||
const int32_t input_in_q4 = MultiplyByQuantizedMultiplier(
|
||||
input, input_multiplier, input_left_shift);
|
||||
using FixedPoint4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
|
||||
const int32_t output_in_q0 =
|
||||
gemmlowp::logistic(FixedPoint4::FromRaw(input_in_q4)).raw();
|
||||
|
||||
// Rescale and downcast.
|
||||
using gemmlowp::RoundingDivideByPOT;
|
||||
int32_t output_in_q23 =
|
||||
RoundingDivideByPOT(output_in_q0, 31 - kOutputIntegerBits);
|
||||
output_in_q23 = std::min(std::max(output_in_q23 + kOutputZeroPoint,
|
||||
static_cast<int32_t>(kMinInt8)),
|
||||
static_cast<int32_t>(kMaxInt8));
|
||||
output_data[i] = static_cast<int8_t>(output_in_q23);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void Logistic(int32_t input_size, const int16_t* ptr_input_data,
|
||||
int16_t* ptr_output_data) {
|
||||
// We use the LUT for sigmoid and take into account, that
|
||||
// tanh(x) = 2*sigmoid(2*x) - 1
|
||||
for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
|
||||
int32_t input_data = *ptr_input_data;
|
||||
|
||||
// Scale by 3/4 to expand range [-8,8]->[-10.7,10.7] and
|
||||
// we do interpolation on unsigned values.
|
||||
uint32_t abs_input_data = 3 * abs(input_data);
|
||||
|
||||
// We divide by 2 power of 9, because
|
||||
// we need to divide by 2 in power of 7 for
|
||||
// the input conversion + 1/4 from the scale above.
|
||||
uint8_t uh = abs_input_data >> 9;
|
||||
uint32_t ua = sigmoid_table_uint16[uh];
|
||||
uint32_t ub = sigmoid_table_uint16[uh + 1];
|
||||
uint32_t ut = abs_input_data & 0x1ff;
|
||||
|
||||
// Interpolation is done using the fractional bit.
|
||||
uint32_t result = (ua << 9) + ut * (ub - ua);
|
||||
|
||||
result = (input_data >= 0) ? (result + (1 << 9))
|
||||
: ((1 << (16 + 9)) - result + (1 << 9) - 1);
|
||||
|
||||
// Back to 16-bit.
|
||||
result >>= 10;
|
||||
|
||||
*ptr_output_data = result;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_integer_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
|
||||
@@ -0,0 +1,131 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
template <typename T>
|
||||
inline void MulElementwise(int size, const ArithmeticParams& params,
|
||||
const T* input1_data, const T* input2_data,
|
||||
T* output_data) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[i] = static_cast<T>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void Mul(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const T* input1_data,
|
||||
const RuntimeShape& input2_shape, const T* input2_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
ruy::profiler::ScopeLabel label("Mul/8bit");
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
MulElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
// Mul with 16 bit inputs and int8_t outputs.
|
||||
inline void Mul(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int16* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16* input2_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("Mul/Int16Int8");
|
||||
int32 output_offset = params.output_offset;
|
||||
int32 output_activation_min = params.quantized_activation_min;
|
||||
int32 output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
// F0 uses 0 integer bits, range [-1, 1].
|
||||
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
|
||||
|
||||
F0 unclamped_result =
|
||||
F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
|
||||
int16 rescaled_result =
|
||||
gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
|
||||
int16 clamped_result =
|
||||
std::min<int16>(output_activation_max - output_offset, rescaled_result);
|
||||
clamped_result =
|
||||
std::max<int16>(output_activation_min - output_offset, clamped_result);
|
||||
output_data[i] = output_offset + clamped_result;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void BroadcastMul4DSlow(
|
||||
const ArithmeticParams& params, const RuntimeShape& input1_shape,
|
||||
const T* input1_data, const RuntimeShape& input2_shape,
|
||||
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastMul4DSlow");
|
||||
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
// The input shapes are extended as part of NdArrayDesc initialization.
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
const RuntimeShape extended_output_shape =
|
||||
RuntimeShape::ExtendedShape(4, output_shape);
|
||||
|
||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
const int32 unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output = std::min(
|
||||
params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
static_cast<T>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_integer_ops
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
|
||||
@@ -0,0 +1,256 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
|
||||
|
||||
#include <limits>
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void AveragePool(const PoolParams& params,
|
||||
const RuntimeShape& input_shape, const int8* input_data,
|
||||
const RuntimeShape& output_shape, int8* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int stride_height = params.stride_height;
|
||||
const int stride_width = params.stride_width;
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int channel = 0; channel < depth; ++channel) {
|
||||
const int in_x_origin =
|
||||
(out_x * stride_width) - params.padding_values.width;
|
||||
const int in_y_origin =
|
||||
(out_y * stride_height) - params.padding_values.height;
|
||||
// Compute the boundaries of the filter region clamped so as to
|
||||
// ensure that the filter window fits in the input array.
|
||||
const int filter_x_start = std::max(0, -in_x_origin);
|
||||
const int filter_x_end =
|
||||
std::min(params.filter_width, input_width - in_x_origin);
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int32 acc = 0;
|
||||
int filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
++filter_x) {
|
||||
const int in_x = in_x_origin + filter_x;
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
acc +=
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
|
||||
filter_count++;
|
||||
}
|
||||
}
|
||||
// Round to the closest integer value.
|
||||
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
|
||||
: (acc - filter_count / 2) / filter_count;
|
||||
acc = std::max(acc, params.quantized_activation_min);
|
||||
acc = std::min(acc, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<int8>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_GE(params.quantized_activation_min,
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_max,
|
||||
std::numeric_limits<int8_t>::max());
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int stride_height = params.stride_height;
|
||||
const int stride_width = params.stride_width;
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int channel = 0; channel < depth; ++channel) {
|
||||
const int in_x_origin =
|
||||
(out_x * stride_width) - params.padding_values.width;
|
||||
const int in_y_origin =
|
||||
(out_y * stride_height) - params.padding_values.height;
|
||||
// Compute the boundaries of the filter region clamped so as to
|
||||
// ensure that the filter window fits in the input array.
|
||||
const int filter_x_start = std::max(0, -in_x_origin);
|
||||
const int filter_x_end =
|
||||
std::min(params.filter_width, input_width - in_x_origin);
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int8_t max = std::numeric_limits<int8_t>::lowest();
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
++filter_x) {
|
||||
const int in_x = in_x_origin + filter_x;
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
max = std::max(
|
||||
max,
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
|
||||
}
|
||||
}
|
||||
max = std::max<int8_t>(max, params.quantized_activation_min);
|
||||
max = std::min<int8_t>(max, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<int8_t>(max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void AveragePool(const PoolParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const int16* input_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int stride_height = params.stride_height;
|
||||
const int stride_width = params.stride_width;
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int channel = 0; channel < depth; ++channel) {
|
||||
const int in_x_origin =
|
||||
(out_x * stride_width) - params.padding_values.width;
|
||||
const int in_y_origin =
|
||||
(out_y * stride_height) - params.padding_values.height;
|
||||
// Compute the boundaries of the filter region clamped so as to
|
||||
// ensure that the filter window fits in the input array.
|
||||
const int filter_x_start = std::max(0, -in_x_origin);
|
||||
const int filter_x_end =
|
||||
std::min(params.filter_width, input_width - in_x_origin);
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int32 acc = 0;
|
||||
int filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
++filter_x) {
|
||||
const int in_x = in_x_origin + filter_x;
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
acc +=
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
|
||||
filter_count++;
|
||||
}
|
||||
}
|
||||
// Round to the closest integer value.
|
||||
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
|
||||
: (acc - filter_count / 2) / filter_count;
|
||||
acc = std::max(acc, params.quantized_activation_min);
|
||||
acc = std::min(acc, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<int16>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const int16* input_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_GE(params.quantized_activation_min,
|
||||
std::numeric_limits<int16_t>::min());
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_max,
|
||||
std::numeric_limits<int16_t>::max());
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int stride_height = params.stride_height;
|
||||
const int stride_width = params.stride_width;
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int channel = 0; channel < depth; ++channel) {
|
||||
const int in_x_origin =
|
||||
(out_x * stride_width) - params.padding_values.width;
|
||||
const int in_y_origin =
|
||||
(out_y * stride_height) - params.padding_values.height;
|
||||
// Compute the boundaries of the filter region clamped so as to
|
||||
// ensure that the filter window fits in the input array.
|
||||
const int filter_x_start = std::max(0, -in_x_origin);
|
||||
const int filter_x_end =
|
||||
std::min(params.filter_width, input_width - in_x_origin);
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int16_t max = std::numeric_limits<int16_t>::lowest();
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
++filter_x) {
|
||||
const int in_x = in_x_origin + filter_x;
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
max = std::max(
|
||||
max,
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
|
||||
}
|
||||
}
|
||||
max = std::max<int16_t>(max, params.quantized_activation_min);
|
||||
max = std::min<int16_t>(max, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<int16_t>(max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_integer_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
|
||||
@@ -0,0 +1,91 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const float* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
float* output_data, float epsilon = 1e-6) {
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
float squared_l2_norm = 0;
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
const float val = input_data[depth * i + c];
|
||||
squared_l2_norm += val * val;
|
||||
}
|
||||
float l2_norm = std::sqrt(squared_l2_norm);
|
||||
l2_norm = std::max(l2_norm, epsilon);
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
const int32 input_zero_point = op_params.input_zero_point;
|
||||
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
int32 square_l2_norm = 0;
|
||||
for (int c = 0; c < depth; c++) {
|
||||
int32 diff = input_data[depth * i + c] - input_zero_point;
|
||||
square_l2_norm += diff * diff;
|
||||
}
|
||||
int32 inv_l2norm_multiplier;
|
||||
int inv_l2norm_shift;
|
||||
GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
|
||||
&inv_l2norm_multiplier, &inv_l2norm_shift);
|
||||
for (int c = 0; c < depth; c++) {
|
||||
int32 diff = input_data[depth * i + c] - input_zero_point;
|
||||
int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
|
||||
int32 unclamped_output_val = 128 + rescaled_diff;
|
||||
int32 output_val =
|
||||
std::min(static_cast<int32>(255),
|
||||
std::max(static_cast<int32>(0), unclamped_output_val));
|
||||
output_data[depth * i + c] = static_cast<uint8>(output_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
|
||||
@@ -0,0 +1,132 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const float cutoff_upper = 16.619047164916992188f;
|
||||
const float cutoff_lower = -9.f;
|
||||
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
// Rational for using approximation in reference kernel.
|
||||
// 0. This approximation gives enough precision for float.
|
||||
// 1. This works around an issue on an embedded chipset where exp() does not
|
||||
// return correctly as expected - exp(x) should return inf when overflown
|
||||
// not 1.701417 IEEE 754 defines representation for inf.
|
||||
// 2. This will speed up calculation and is matching the behavior in the
|
||||
// optimized kernels. (check the definition of scalar_logistic_op<float>)
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
float val = input_data[i];
|
||||
float result;
|
||||
if (val > cutoff_upper) {
|
||||
result = 1.0f;
|
||||
} else if (val < cutoff_lower) {
|
||||
result = std::exp(val);
|
||||
} else {
|
||||
result = 1.f / (1.f + std::exp(-val));
|
||||
}
|
||||
output_data[i] = result;
|
||||
}
|
||||
}
|
||||
|
||||
// Convenience version that allows, for example, generated-code calls to be
|
||||
// uniform between data types.
|
||||
inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
|
||||
const float* input_data, const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
// Drop params: not needed.
|
||||
Logistic(input_shape, input_data, output_shape, output_data);
|
||||
}
|
||||
|
||||
inline void Logistic(const LogisticParams& params,
|
||||
const RuntimeShape& input_shape, const int16* input_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
// F0 uses 0 integer bits, range [-1, 1].
|
||||
// This is the return type of math functions such as tanh, logistic,
|
||||
// whose range is in [-1, 1].
|
||||
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
|
||||
// F3 uses 3 integer bits, range [-8, 8], the input range expected here.
|
||||
using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
|
||||
|
||||
const F3 input = F3::FromRaw(input_data[i]);
|
||||
F0 output = gemmlowp::logistic(input);
|
||||
output_data[i] = output.raw();
|
||||
}
|
||||
}
|
||||
|
||||
// Quantized int8 logistic activation. Cheats by dequantizing and requantizing
|
||||
// around the floating point logistic method. This implementation is slow on
|
||||
// platforms without a floating point unit.
|
||||
|
||||
// TODO(b/141211002): Delete this int8 implementation once we can reuse the
|
||||
// approach used in TFLite for int8 Logistic.
|
||||
inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
|
||||
float input_scale, int input_zero_point,
|
||||
const RuntimeShape& output_shape, int8_t* output_data,
|
||||
float output_scale, int output_zero_point) {
|
||||
const float cutoff_upper = 16.619047164916992188f;
|
||||
const float cutoff_lower = -9.f;
|
||||
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
// Rational for using approximation in reference kernel.
|
||||
// 0. This approximation gives enough precision for float.
|
||||
// 1. This works around an issue on an embedded chipset where exp() does not
|
||||
// return correctly as expected - exp(x) should return inf when overflown
|
||||
// not 1.701417 IEEE 754 defines representation for inf.
|
||||
// 2. This will speed up calculation and is matching the behavior in the
|
||||
// optimized kernels. (check the definition of scalar_logistic_op<float>)
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
// Dequantize.
|
||||
float val =
|
||||
static_cast<float>((input_data[i] - input_zero_point) * input_scale);
|
||||
float result;
|
||||
if (val > cutoff_upper) {
|
||||
result = 1.0f;
|
||||
} else if (val < cutoff_lower) {
|
||||
result = std::exp(val);
|
||||
} else {
|
||||
result = 1.f / (1.f + std::exp(-val));
|
||||
}
|
||||
// Requantize
|
||||
int8_t output =
|
||||
static_cast<int8_t>(result / output_scale + output_zero_point);
|
||||
output_data[i] = output;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
|
||||
@@ -0,0 +1,64 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
template <typename T, typename Op, int N = 5>
|
||||
void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape,
|
||||
const T* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const T* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
T* output_data, Op op) {
|
||||
// Uses element-wise calculation if broadcast is not required.
|
||||
if (unextended_input1_shape == unextended_input2_shape) {
|
||||
const int flat_size =
|
||||
MatchingElementsSize(unextended_input1_shape, unextended_input2_shape,
|
||||
unextended_output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = op(input1_data[i], input2_data[i]);
|
||||
}
|
||||
} else {
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
|
||||
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(
|
||||
unextended_input1_shape, unextended_input2_shape, &desc1, &desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
|
||||
&output_desc);
|
||||
|
||||
auto maxmin_func = [&](int indexes[N]) {
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
op(input1_data[SubscriptToIndex(desc1, indexes)],
|
||||
input2_data[SubscriptToIndex(desc2, indexes)]);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, maxmin_func);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
|
||||
@@ -0,0 +1,166 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
// Element-wise mul that can often be used for inner loop of broadcast Mul as
|
||||
// well as the non-broadcast Mul.
|
||||
inline void MulElementwise(int size, const ArithmeticParams& params,
|
||||
const uint8* input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void Mul(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const T* input1_data,
|
||||
const RuntimeShape& input2_shape, const T* input2_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
T output_activation_min;
|
||||
T output_activation_max;
|
||||
GetActivationParams(params, &output_activation_min, &output_activation_max);
|
||||
|
||||
const int flat_size =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] * input2_data[i], output_activation_min,
|
||||
output_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Mul(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8* input2_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
MulElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void BroadcastMul4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const uint8* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
const RuntimeShape extended_output_shape =
|
||||
RuntimeShape::ExtendedShape(4, output_shape);
|
||||
|
||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
const int32 unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output = std::min(
|
||||
params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BroadcastMul4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& unextended_input1_shape,
|
||||
const T* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const T* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
T* output_data) {
|
||||
T output_activation_min;
|
||||
T output_activation_max;
|
||||
GetActivationParams(params, &output_activation_min, &output_activation_max);
|
||||
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
for (int b = 0; b < output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < output_shape.Dims(3); ++c) {
|
||||
output_data[Offset(output_shape, b, y, x, c)] =
|
||||
ActivationFunctionWithMinMax(
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)] *
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
|
||||
output_activation_min, output_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
|
||||
@@ -0,0 +1,37 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
template <typename T>
|
||||
inline void Negate(const RuntimeShape& input_shape, const T* input_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = -input_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_NEG_H_
|
||||
@@ -0,0 +1,184 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
// TFLite Pad supports activation tensors with up to 4 dimensions.
|
||||
constexpr int PadKernelMaxDimensionCount() { return 4; }
|
||||
|
||||
// There are two versions of pad: Pad and PadV2. In PadV2 there is a second
|
||||
// scalar input that provides the padding value. Therefore pad_value_ptr can be
|
||||
// equivalent to a simple input1_data. For Pad, it should point to a zero
|
||||
// value.
|
||||
//
|
||||
// Note that two typenames are required, so that T=P=int32 is considered a
|
||||
// specialization distinct from P=int32.
|
||||
template <typename T, typename P>
|
||||
inline void PadImpl(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const P* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
T* output_data) {
|
||||
const RuntimeShape ext_input_shape =
|
||||
RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), input_shape);
|
||||
const RuntimeShape ext_output_shape =
|
||||
RuntimeShape::ExtendedShape(PadKernelMaxDimensionCount(), output_shape);
|
||||
TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
|
||||
TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
|
||||
|
||||
// Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
|
||||
// pad them to 4 dims (yes, we are "padding the padding").
|
||||
int left_padding_copy[PadKernelMaxDimensionCount()];
|
||||
for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
|
||||
left_padding_copy[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < op_params.left_padding_count; ++i) {
|
||||
left_padding_copy[i + PadKernelMaxDimensionCount() -
|
||||
op_params.left_padding_count] = op_params.left_padding[i];
|
||||
}
|
||||
int right_padding_copy[PadKernelMaxDimensionCount()];
|
||||
for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
|
||||
right_padding_copy[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < op_params.right_padding_count; ++i) {
|
||||
right_padding_copy[i + PadKernelMaxDimensionCount() -
|
||||
op_params.right_padding_count] =
|
||||
op_params.right_padding[i];
|
||||
}
|
||||
|
||||
const int output_batch = ext_output_shape.Dims(0);
|
||||
const int output_height = ext_output_shape.Dims(1);
|
||||
const int output_width = ext_output_shape.Dims(2);
|
||||
const int output_depth = ext_output_shape.Dims(3);
|
||||
|
||||
const int left_b_padding = left_padding_copy[0];
|
||||
const int left_h_padding = left_padding_copy[1];
|
||||
const int left_w_padding = left_padding_copy[2];
|
||||
const int left_d_padding = left_padding_copy[3];
|
||||
|
||||
const int right_b_padding = right_padding_copy[0];
|
||||
const int right_h_padding = right_padding_copy[1];
|
||||
const int right_w_padding = right_padding_copy[2];
|
||||
const int right_d_padding = right_padding_copy[3];
|
||||
|
||||
const T pad_value = *pad_value_ptr;
|
||||
|
||||
const T* in_ptr = input_data;
|
||||
T* out_ptr = output_data;
|
||||
for (int out_b = 0; out_b < output_batch; ++out_b) {
|
||||
for (int out_h = 0; out_h < output_height; ++out_h) {
|
||||
for (int out_w = 0; out_w < output_width; ++out_w) {
|
||||
for (int out_d = 0; out_d < output_depth; ++out_d) {
|
||||
if (out_b < left_b_padding ||
|
||||
out_b >= output_batch - right_b_padding ||
|
||||
out_h < left_h_padding ||
|
||||
out_h >= output_height - right_h_padding ||
|
||||
out_w < left_w_padding ||
|
||||
out_w >= output_width - right_w_padding ||
|
||||
out_d < left_d_padding ||
|
||||
out_d >= output_depth - right_d_padding) {
|
||||
*out_ptr++ = pad_value;
|
||||
} else {
|
||||
*out_ptr++ = *in_ptr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename P>
|
||||
inline void Pad(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const P* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
T* output_data) {
|
||||
PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
// The second (pad-value) input can be int32 when, say, the first is uint8.
|
||||
template <typename T>
|
||||
inline void Pad(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const int32* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
T* output_data) {
|
||||
const T converted_pad_value = static_cast<T>(*pad_value_ptr);
|
||||
PadImpl(op_params, input_shape, input_data, &converted_pad_value,
|
||||
output_shape, output_data);
|
||||
}
|
||||
|
||||
// This version avoids conflicting template matching.
|
||||
template <>
|
||||
inline void Pad(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const int32* input_data,
|
||||
const int32* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
// One could make all PadImageStyle calls simply delegate the work to the
|
||||
// ordinary Pad. However, it is better that the reference code asserts false in
|
||||
// similar cases.
|
||||
template <typename T, typename P>
|
||||
inline void PadImageStyle(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const P* pad_value_ptr,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
TFLITE_ASSERT_FALSE;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void PadImageStyle(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const P* pad_value_ptr,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void PadImageStyle(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const P* pad_value_ptr,
|
||||
const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void PadImageStyle(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const float* input_data, const P* pad_value_ptr,
|
||||
const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PAD_H_
|
||||
@@ -0,0 +1,296 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
inline void AveragePool(const PoolParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int stride_height = params.stride_height;
|
||||
const int stride_width = params.stride_width;
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int channel = 0; channel < depth; ++channel) {
|
||||
const int in_x_origin =
|
||||
(out_x * stride_width) - params.padding_values.width;
|
||||
const int in_y_origin =
|
||||
(out_y * stride_height) - params.padding_values.height;
|
||||
// Compute the boundaries of the filter region clamped so as to
|
||||
// ensure that the filter window fits in the input array.
|
||||
const int filter_x_start = std::max(0, -in_x_origin);
|
||||
const int filter_x_end =
|
||||
std::min(params.filter_width, input_width - in_x_origin);
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
float total = 0.f;
|
||||
float filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
++filter_x) {
|
||||
const int in_x = in_x_origin + filter_x;
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
total +=
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
|
||||
filter_count++;
|
||||
}
|
||||
}
|
||||
const float average = total / filter_count;
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
ActivationFunctionWithMinMax(average, params.float_activation_min,
|
||||
params.float_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void AveragePool(const PoolParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int stride_height = params.stride_height;
|
||||
const int stride_width = params.stride_width;
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int channel = 0; channel < depth; ++channel) {
|
||||
const int in_x_origin =
|
||||
(out_x * stride_width) - params.padding_values.width;
|
||||
const int in_y_origin =
|
||||
(out_y * stride_height) - params.padding_values.height;
|
||||
// Compute the boundaries of the filter region clamped so as to
|
||||
// ensure that the filter window fits in the input array.
|
||||
const int filter_x_start = std::max(0, -in_x_origin);
|
||||
const int filter_x_end =
|
||||
std::min(params.filter_width, input_width - in_x_origin);
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int32 acc = 0;
|
||||
int filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
++filter_x) {
|
||||
const int in_x = in_x_origin + filter_x;
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
acc +=
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
|
||||
filter_count++;
|
||||
}
|
||||
}
|
||||
acc = (acc + filter_count / 2) / filter_count;
|
||||
acc = std::max(acc, params.quantized_activation_min);
|
||||
acc = std::min(acc, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<uint8>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const float* input_data, const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int stride_height = params.stride_height;
|
||||
const int stride_width = params.stride_width;
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int channel = 0; channel < depth; ++channel) {
|
||||
const int in_x_origin =
|
||||
(out_x * stride_width) - params.padding_values.width;
|
||||
const int in_y_origin =
|
||||
(out_y * stride_height) - params.padding_values.height;
|
||||
// Compute the boundaries of the filter region clamped so as to
|
||||
// ensure that the filter window fits in the input array.
|
||||
const int filter_x_start = std::max(0, -in_x_origin);
|
||||
const int filter_x_end =
|
||||
std::min(params.filter_width, input_width - in_x_origin);
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
float sum_squares = 0.f;
|
||||
int filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
++filter_x) {
|
||||
const int in_x = in_x_origin + filter_x;
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
const float val =
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)];
|
||||
sum_squares += val * val;
|
||||
filter_count++;
|
||||
}
|
||||
}
|
||||
const float l2pool_result = std::sqrt(sum_squares / filter_count);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
ActivationFunctionWithMinMax(l2pool_result,
|
||||
params.float_activation_min,
|
||||
params.float_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const float* input_data, const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int stride_height = params.stride_height;
|
||||
const int stride_width = params.stride_width;
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int channel = 0; channel < depth; ++channel) {
|
||||
const int in_x_origin =
|
||||
(out_x * stride_width) - params.padding_values.width;
|
||||
const int in_y_origin =
|
||||
(out_y * stride_height) - params.padding_values.height;
|
||||
// Compute the boundaries of the filter region clamped so as to
|
||||
// ensure that the filter window fits in the input array.
|
||||
const int filter_x_start = std::max(0, -in_x_origin);
|
||||
const int filter_x_end =
|
||||
std::min(params.filter_width, input_width - in_x_origin);
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
float max = std::numeric_limits<float>::lowest();
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
++filter_x) {
|
||||
const int in_x = in_x_origin + filter_x;
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
max = std::max(
|
||||
max,
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
|
||||
}
|
||||
}
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
ActivationFunctionWithMinMax(max, params.float_activation_min,
|
||||
params.float_activation_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_max, 255);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int stride_height = params.stride_height;
|
||||
const int stride_width = params.stride_width;
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
for (int channel = 0; channel < depth; ++channel) {
|
||||
const int in_x_origin =
|
||||
(out_x * stride_width) - params.padding_values.width;
|
||||
const int in_y_origin =
|
||||
(out_y * stride_height) - params.padding_values.height;
|
||||
// Compute the boundaries of the filter region clamped so as to
|
||||
// ensure that the filter window fits in the input array.
|
||||
const int filter_x_start = std::max(0, -in_x_origin);
|
||||
const int filter_x_end =
|
||||
std::min(params.filter_width, input_width - in_x_origin);
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
uint8 max = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
++filter_x) {
|
||||
const int in_x = in_x_origin + filter_x;
|
||||
const int in_y = in_y_origin + filter_y;
|
||||
max = std::max(
|
||||
max,
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
|
||||
}
|
||||
}
|
||||
max = std::max<uint8>(max, params.quantized_activation_min);
|
||||
max = std::min<uint8>(max, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<uint8>(max);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
|
||||
@@ -0,0 +1,76 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
// Broadcast prelu to output_shape for quantized uint8/int8 data.
|
||||
template <typename T>
|
||||
inline void BroadcastPrelu4DSlow(
|
||||
const PreluParams& params, const RuntimeShape& input_shape,
|
||||
const T* input_data, const RuntimeShape& alpha_shape, const T* alpha_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
TFLITE_DCHECK_LE(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(alpha_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape extended_output_shape =
|
||||
RuntimeShape::ExtendedShape(4, output_shape);
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input_shape, alpha_shape, &desc1, &desc2);
|
||||
|
||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
int output_index = Offset(extended_output_shape, b, y, x, c);
|
||||
int input_index = SubscriptToIndex(desc1, b, y, x, c);
|
||||
const int32 input_value =
|
||||
params.input_offset + input_data[input_index];
|
||||
int32 output_value;
|
||||
if (input_value >= 0) {
|
||||
output_value = input_value;
|
||||
} else {
|
||||
auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
|
||||
const int32 alpha_value =
|
||||
params.alpha_offset + alpha_data[alpha_index];
|
||||
output_value = MultiplyByQuantizedMultiplier(
|
||||
input_value * alpha_value, params.output_multiplier,
|
||||
params.output_shift);
|
||||
}
|
||||
output_value += params.output_offset;
|
||||
|
||||
const int32 quantized_min = std::numeric_limits<T>::min();
|
||||
const int32 quantized_max = std::numeric_limits<T>::max();
|
||||
const int32 clamped_output =
|
||||
std::min(quantized_max, std::max(quantized_min, output_value));
|
||||
output_data[output_index] = static_cast<T>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
|
||||
@@ -0,0 +1,134 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
// Consolidates dimensions in broadcast inputs, checks for five-fold pattern.
|
||||
//
|
||||
// For example, if sequence of dimensions of one input is
|
||||
// ..., 1, 3, 1, 7, 9, 5,... and the other is ..., 2, 3, 1, 7, 1, 1, ...
|
||||
// we can consolidate these as
|
||||
// ..., 1, 3*7, 9*5, ... and 2, 3*7, 1.
|
||||
//
|
||||
// The category is updated in the less-frequent case of shapes that are
|
||||
// not suited to a fivefold-loop broadcast.
|
||||
//
|
||||
// Falls back to generic pattern when it does not know how to process properly.
|
||||
//
|
||||
// Returns true iff there is some sort of broadcast, which includes five-fold
|
||||
// patterns and falling back to generic broadcast.
|
||||
inline bool ProcessBroadcastShapes(const RuntimeShape& shape0,
|
||||
const RuntimeShape& shape1,
|
||||
tflite::ArithmeticParams* params) {
|
||||
const int dims_count =
|
||||
std::max(shape0.DimensionsCount(), shape1.DimensionsCount());
|
||||
|
||||
params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
|
||||
RuntimeShape scalar_shape(dims_count, 1);
|
||||
|
||||
auto extended_shape0 = RuntimeShape::ExtendedShape(dims_count, shape0);
|
||||
auto extended_shape1 = RuntimeShape::ExtendedShape(dims_count, shape1);
|
||||
|
||||
// Check for "exact" match, implicitly accepting any scalar shapes.
|
||||
if (extended_shape0 == extended_shape1) {
|
||||
params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = dims_count - 1; i >= 0; --i) {
|
||||
if (extended_shape0.Dims(i) == extended_shape1.Dims(i)) {
|
||||
continue;
|
||||
} else if (extended_shape0.Dims(i) == 1) {
|
||||
params->broadcast_category =
|
||||
BroadcastableOpCategory::kFirstInputBroadcastsFast;
|
||||
break;
|
||||
} else if (extended_shape1.Dims(i) == 1) {
|
||||
params->broadcast_category =
|
||||
BroadcastableOpCategory::kSecondInputBroadcastsFast;
|
||||
break;
|
||||
} else {
|
||||
// This case is erroneous: there is a dimension that does not match and
|
||||
// is not a broadcast from one shape to the other.
|
||||
params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (params->broadcast_category !=
|
||||
BroadcastableOpCategory::kFirstInputBroadcastsFast &&
|
||||
params->broadcast_category !=
|
||||
BroadcastableOpCategory::kSecondInputBroadcastsFast) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// From this point it is assumed contractually that corresponding dimensions
|
||||
// in shape0 and shape1 are either (a) equal or (b) one or other equals 1.
|
||||
const bool swap_inputs = params->broadcast_category ==
|
||||
BroadcastableOpCategory::kSecondInputBroadcastsFast;
|
||||
const RuntimeShape* shape_a =
|
||||
swap_inputs ? &extended_shape1 : &extended_shape0;
|
||||
const RuntimeShape* shape_b =
|
||||
swap_inputs ? &extended_shape0 : &extended_shape1;
|
||||
|
||||
int i = dims_count - 1;
|
||||
params->broadcast_shape[0] = 1;
|
||||
params->broadcast_shape[1] = 1;
|
||||
params->broadcast_shape[2] = 1;
|
||||
params->broadcast_shape[3] = 1;
|
||||
params->broadcast_shape[4] = 1;
|
||||
// y_0 is greedy: include dims if both or neither equal 1: in other words,
|
||||
// test for equality rather than (shape_a->Dims(i) != 1).
|
||||
while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
|
||||
params->broadcast_shape[4] *= shape_b->Dims(i);
|
||||
--i;
|
||||
}
|
||||
// Here either input_a or input_b has dim of 1 (if i >= 0). If it is input_b
|
||||
// that has the unit dimension, the next two loops are not entered.
|
||||
while (i >= 0 && shape_a->Dims(i) == 1) {
|
||||
params->broadcast_shape[3] *= shape_b->Dims(i);
|
||||
--i;
|
||||
}
|
||||
while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
|
||||
params->broadcast_shape[2] *= shape_a->Dims(i);
|
||||
--i;
|
||||
}
|
||||
// Here either input_a or input_b has dim of 1 (if i >= 0).
|
||||
while (i >= 0 && shape_b->Dims(i) == 1) {
|
||||
params->broadcast_shape[1] *= shape_a->Dims(i);
|
||||
--i;
|
||||
}
|
||||
while (i >= 0 && shape_a->Dims(i) == shape_b->Dims(i)) {
|
||||
params->broadcast_shape[0] *= shape_b->Dims(i);
|
||||
--i;
|
||||
}
|
||||
|
||||
// Rarer case is when the broadcast dimensions cannot be handled by a fivefold
|
||||
// loop.
|
||||
if (i >= 0) {
|
||||
params->broadcast_category = BroadcastableOpCategory::kGenericBroadcast;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
|
||||
@@ -0,0 +1,51 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
template <typename InputT, typename OutputT>
|
||||
inline void AffineQuantize(const tflite::QuantizationParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const InputT* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
OutputT* output_data) {
|
||||
const int32 zero_point = op_params.zero_point;
|
||||
const double scale = op_params.scale;
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
static constexpr int32 min_val = std::numeric_limits<OutputT>::min();
|
||||
static constexpr int32 max_val = std::numeric_limits<OutputT>::max();
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
const InputT val = input_data[i];
|
||||
int32 unclamped =
|
||||
static_cast<int32>(TfLiteRound(val / static_cast<float>(scale))) +
|
||||
zero_point;
|
||||
int32 clamped = std::min(std::max(unclamped, min_val), max_val);
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
|
||||
@@ -0,0 +1,400 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
|
||||
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
// A generic reduce method that can be used for reduce_sum, reduce_mean, etc.
|
||||
// This method iterates through input data and reduce elements along the
|
||||
// dimensions given in axis.
|
||||
template <typename In, typename Out>
|
||||
inline bool Reduce(const In* input_data, const int* input_dims,
|
||||
const int* output_dims, const int input_num_dims,
|
||||
const int output_num_dims, const int* axis,
|
||||
const int num_axis, int* input_iter,
|
||||
Out reducer(const Out current, const In in),
|
||||
Out* output_data) {
|
||||
// Reset input iterator.
|
||||
for (int idx = 0; idx < input_num_dims; ++idx) {
|
||||
input_iter[idx] = 0;
|
||||
}
|
||||
// Iterate through input_data.
|
||||
do {
|
||||
size_t input_offset =
|
||||
ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
|
||||
size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
|
||||
input_iter, num_axis, axis);
|
||||
output_data[output_offset] =
|
||||
reducer(output_data[output_offset], input_data[input_offset]);
|
||||
} while (NextIndex(input_num_dims, input_dims, input_iter));
|
||||
return true;
|
||||
}
|
||||
|
||||
// This method parses the input 'axis' to remove duplicates and handle negative
|
||||
// values, and returns a valid 'out_axis'
|
||||
inline bool ResolveAxis(const int num_dims, const int* axis,
|
||||
const int64_t num_axis, int* out_axis,
|
||||
int* out_num_axis) {
|
||||
*out_num_axis = 0; // Just in case.
|
||||
// Short-circuit axis resolution for scalars; the axis will go unused.
|
||||
if (num_dims == 0) {
|
||||
return true;
|
||||
}
|
||||
// o(n^2) is fine since out_num_axis should be really small, mostly <= 4
|
||||
for (int64_t idx = 0; idx < num_axis; ++idx) {
|
||||
// Handle negative index. A positive index 'p_idx' can be represented as a
|
||||
// negative index 'n_idx' as: n_idx = p_idx-num_dims
|
||||
// eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1] */
|
||||
int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
|
||||
TFLITE_DCHECK(current >= 0 && current < num_dims);
|
||||
bool is_dup = false;
|
||||
for (int j = 0; j < *out_num_axis; ++j) {
|
||||
if (out_axis[j] == current) {
|
||||
is_dup = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!is_dup) {
|
||||
out_axis[*out_num_axis] = current;
|
||||
*out_num_axis += 1;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// This method expects that output_data has been initialized.
|
||||
template <typename In, typename Out>
|
||||
inline bool ReduceSumImpl(const In* input_data, const int* input_dims,
|
||||
const int* output_dims, const int input_num_dims,
|
||||
const int output_num_dims, const int* axis,
|
||||
const int num_axis, int* input_iter,
|
||||
Out* output_data) {
|
||||
auto reducer = [](const Out current, const In in) -> Out {
|
||||
const Out actual_in = static_cast<Out>(in);
|
||||
return current + actual_in;
|
||||
};
|
||||
return Reduce<In, Out>(input_data, input_dims, output_dims, input_num_dims,
|
||||
output_num_dims, axis, num_axis, input_iter, reducer,
|
||||
output_data);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
|
||||
const T init_value, T* data) {
|
||||
size_t num_elements = 1;
|
||||
for (int idx = 0; idx < num_dims; ++idx) {
|
||||
size_t current = static_cast<size_t>(dims[idx]);
|
||||
// Overflow prevention.
|
||||
if (num_elements > std::numeric_limits<size_t>::max() / current) {
|
||||
return false;
|
||||
}
|
||||
num_elements *= current;
|
||||
}
|
||||
for (size_t idx = 0; idx < num_elements; ++idx) {
|
||||
data[idx] = init_value;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Computes the generic value (i.e., sum/max/min/prod) of elements across
|
||||
// dimensions given in axis. It needs to pass in init_value and reducer.
|
||||
template <typename T>
|
||||
inline bool ReduceGeneric(const T* input_data, const int* input_dims,
|
||||
const int input_num_dims, T* output_data,
|
||||
const int* output_dims, const int output_num_dims,
|
||||
const int* axis, const int64_t num_axis_dimensions,
|
||||
bool keep_dims, int* temp_index, int* resolved_axis,
|
||||
T init_value,
|
||||
T reducer(const T current, const T in)) {
|
||||
// Reset output data.
|
||||
if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
|
||||
output_data)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Resolve axis.
|
||||
int num_resolved_axis = 0;
|
||||
if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
|
||||
&num_resolved_axis)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return Reduce<T, T>(input_data, input_dims, output_dims, input_num_dims,
|
||||
output_num_dims, resolved_axis, num_resolved_axis,
|
||||
temp_index, reducer, output_data);
|
||||
}
|
||||
|
||||
// Computes the mean of elements across dimensions given in axis.
|
||||
// It does so in two stages, first calculates the sum of elements along the axis
|
||||
// then divides it by the number of element in axis.
|
||||
template <typename T, typename U>
|
||||
inline bool Mean(const T* input_data, const int* input_dims,
|
||||
const int input_num_dims, T* output_data,
|
||||
const int* output_dims, const int output_num_dims,
|
||||
const int* axis, const int num_axis_dimensions, bool keep_dims,
|
||||
int* temp_index, int* resolved_axis, U* temp_sum) {
|
||||
ruy::profiler::ScopeLabel label("Mean");
|
||||
// Reset output data.
|
||||
size_t num_outputs = 1;
|
||||
for (int idx = 0; idx < output_num_dims; ++idx) {
|
||||
size_t current = static_cast<size_t>(output_dims[idx]);
|
||||
// Overflow prevention.
|
||||
if (num_outputs > std::numeric_limits<size_t>::max() / current) {
|
||||
return false;
|
||||
}
|
||||
num_outputs *= current;
|
||||
}
|
||||
for (size_t idx = 0; idx < num_outputs; ++idx) {
|
||||
output_data[idx] = T();
|
||||
temp_sum[idx] = U();
|
||||
}
|
||||
|
||||
// Resolve axis.
|
||||
int num_resolved_axis = 0;
|
||||
if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
|
||||
&num_resolved_axis)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
|
||||
output_num_dims, resolved_axis, num_resolved_axis,
|
||||
temp_index, temp_sum)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Calculate mean by dividing output_data by num of aggregated element.
|
||||
U num_elements_in_axis = 1;
|
||||
for (int idx = 0; idx < num_resolved_axis; ++idx) {
|
||||
size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
|
||||
// Overflow prevention.
|
||||
if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
|
||||
return false;
|
||||
}
|
||||
num_elements_in_axis *= current;
|
||||
}
|
||||
|
||||
if (num_elements_in_axis > 0) {
|
||||
for (size_t idx = 0; idx < num_outputs; ++idx) {
|
||||
output_data[idx] =
|
||||
static_cast<T>(temp_sum[idx] / static_cast<U>(num_elements_in_axis));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void Mean(const tflite::MeanParams& op_params,
|
||||
const RuntimeShape& unextended_input_shape,
|
||||
const T* input_data,
|
||||
const RuntimeShape& unextended_output_shape, T* output_data) {
|
||||
ruy::profiler::ScopeLabel label("Mean4D");
|
||||
|
||||
// Current implementation only supports dimension equals 4 and simultaneous
|
||||
// reduction over width and height.
|
||||
TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
|
||||
TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape input_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_input_shape);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
const int output_batch = output_shape.Dims(0);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int output_depth = output_shape.Dims(3);
|
||||
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
|
||||
TFLITE_CHECK_EQ(op_params.axis_count, 2);
|
||||
TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
|
||||
TFLITE_CHECK_EQ(output_height, 1);
|
||||
TFLITE_CHECK_EQ(output_width, 1);
|
||||
|
||||
for (int out_b = 0; out_b < output_batch; ++out_b) {
|
||||
for (int out_d = 0; out_d < output_depth; ++out_d) {
|
||||
float value = 0;
|
||||
for (int in_h = 0; in_h < input_height; ++in_h) {
|
||||
for (int in_w = 0; in_w < input_width; ++in_w) {
|
||||
value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
|
||||
}
|
||||
}
|
||||
output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
|
||||
value / (input_width * input_height);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void Mean(const tflite::MeanParams& op_params,
|
||||
const RuntimeShape& unextended_input_shape,
|
||||
const uint8_t* input_data, int32 input_zero_point,
|
||||
float input_scale, const RuntimeShape& unextended_output_shape,
|
||||
uint8_t* output_data, int32 output_zero_point,
|
||||
float output_scale) {
|
||||
ruy::profiler::ScopeLabel label("Mean4D/Uint8");
|
||||
|
||||
// Current implementation only supports dimension equals 4 and simultaneous
|
||||
// reduction over width and height.
|
||||
TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
|
||||
TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape input_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_input_shape);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
const int output_batch = output_shape.Dims(0);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int output_depth = output_shape.Dims(3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const float num_elements_in_axis = input_width * input_height;
|
||||
|
||||
TFLITE_CHECK_EQ(op_params.axis_count, 2);
|
||||
TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
|
||||
TFLITE_CHECK_EQ(output_height, 1);
|
||||
TFLITE_CHECK_EQ(output_width, 1);
|
||||
|
||||
constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
|
||||
constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
|
||||
|
||||
int32 bias =
|
||||
output_zero_point -
|
||||
static_cast<int32>(input_zero_point * input_scale / output_scale);
|
||||
double real_scale =
|
||||
static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
|
||||
|
||||
int32_t multiplier;
|
||||
int shift;
|
||||
QuantizeMultiplier(real_scale, &multiplier, &shift);
|
||||
for (int out_b = 0; out_b < output_batch; ++out_b) {
|
||||
for (int out_d = 0; out_d < output_depth; ++out_d) {
|
||||
int32 acc = 0;
|
||||
for (int in_h = 0; in_h < input_height; ++in_h) {
|
||||
for (int in_w = 0; in_w < input_width; ++in_w) {
|
||||
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
|
||||
}
|
||||
}
|
||||
acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
|
||||
acc += bias;
|
||||
acc = std::min(std::max(acc, kMinValue), kMaxValue);
|
||||
output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
|
||||
static_cast<uint8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Computes the mean of elements across dimensions given in axis.
|
||||
// It does so in two stages, first calculates the sum of elements along the axis
|
||||
// then divides it by the number of element in axis for quantized values.
|
||||
template <typename T, typename U>
|
||||
inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
|
||||
float input_scale, const int* input_dims,
|
||||
const int input_num_dims, T* output_data,
|
||||
int32 output_zero_point, float output_scale,
|
||||
const int* output_dims,
|
||||
const int output_num_dims, const int* axis,
|
||||
const int num_axis_dimensions, bool keep_dims,
|
||||
int* temp_index, int* resolved_axis, U* temp_sum,
|
||||
bool compute_sum) {
|
||||
const bool uint8_case = std::is_same<T, int8_t>::value;
|
||||
if (uint8_case) {
|
||||
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8");
|
||||
} else {
|
||||
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8");
|
||||
}
|
||||
// Reset output data.
|
||||
size_t num_outputs = 1;
|
||||
for (int idx = 0; idx < output_num_dims; ++idx) {
|
||||
size_t current = static_cast<size_t>(output_dims[idx]);
|
||||
// Overflow prevention.
|
||||
if (num_outputs > std::numeric_limits<size_t>::max() / current) {
|
||||
return false;
|
||||
}
|
||||
num_outputs *= current;
|
||||
}
|
||||
for (size_t idx = 0; idx < num_outputs; ++idx) {
|
||||
output_data[idx] = T();
|
||||
temp_sum[idx] = U();
|
||||
}
|
||||
|
||||
// Resolve axis.
|
||||
int num_resolved_axis = 0;
|
||||
if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
|
||||
&num_resolved_axis)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ReduceSumImpl<T, U>(input_data, input_dims, output_dims, input_num_dims,
|
||||
output_num_dims, resolved_axis, num_resolved_axis,
|
||||
temp_index, temp_sum)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Calculate mean by dividing output_data by num of aggregated element.
|
||||
U num_elements_in_axis = 1;
|
||||
for (int idx = 0; idx < num_resolved_axis; ++idx) {
|
||||
size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
|
||||
// Overflow prevention.
|
||||
if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
|
||||
return false;
|
||||
}
|
||||
num_elements_in_axis *= current;
|
||||
}
|
||||
|
||||
if (num_elements_in_axis > 0) {
|
||||
const float scale = input_scale / output_scale;
|
||||
if (compute_sum) {
|
||||
// TODO(b/116341117): Eliminate float and do this completely in 8bit.
|
||||
const float bias =
|
||||
-input_zero_point * scale * num_elements_in_axis + 0.5f;
|
||||
for (size_t idx = 0; idx < num_outputs; ++idx) {
|
||||
const U value =
|
||||
static_cast<U>(TfLiteRound(temp_sum[idx] * scale + bias)) +
|
||||
output_zero_point;
|
||||
output_data[idx] = static_cast<T>(value);
|
||||
}
|
||||
} else {
|
||||
const float bias = -input_zero_point * scale + 0.5f;
|
||||
for (size_t idx = 0; idx < num_outputs; ++idx) {
|
||||
float float_mean = static_cast<float>(temp_sum[idx]) /
|
||||
static_cast<float>(num_elements_in_axis);
|
||||
float result =
|
||||
std::min(TfLiteRound(float_mean * scale + bias) + output_zero_point,
|
||||
static_cast<float>(std::numeric_limits<T>::max()));
|
||||
result =
|
||||
std::max(result, static_cast<float>(std::numeric_limits<T>::min()));
|
||||
output_data[idx] = static_cast<T>(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
|
||||
@@ -0,0 +1,67 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
|
||||
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
template <typename input_type, typename output_type>
|
||||
inline void Requantize(const input_type* input_data, int32_t size,
|
||||
int32_t effective_scale_multiplier,
|
||||
int32_t effective_scale_shift, int32_t input_zeropoint,
|
||||
int32_t output_zeropoint, output_type* output_data) {
|
||||
ruy::profiler::ScopeLabel label("Requantize");
|
||||
const bool same_scale =
|
||||
(effective_scale_multiplier == 1 << 30 && effective_scale_shift == 1);
|
||||
if (same_scale) {
|
||||
const bool mixed_type_int8_uint8 =
|
||||
std::is_same<input_type, int8_t>::value &&
|
||||
std::is_same<output_type, uint8_t>::value;
|
||||
const bool mixed_type_uint8_int8 =
|
||||
std::is_same<input_type, uint8_t>::value &&
|
||||
std::is_same<output_type, int8_t>::value;
|
||||
const int32_t zero_point_diff = input_zeropoint - output_zeropoint;
|
||||
// Fast path to do requantization for the case when just a shift of 128 is
|
||||
// needed.
|
||||
if ((mixed_type_int8_uint8 && zero_point_diff == -128) ||
|
||||
(mixed_type_uint8_int8 && zero_point_diff == 128)) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
output_data[i] = input_data[i] ^ 0x80;
|
||||
}
|
||||
}
|
||||
}
|
||||
static constexpr int32_t kMinOutput = std::numeric_limits<output_type>::min();
|
||||
static constexpr int32_t kMaxOutput = std::numeric_limits<output_type>::max();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32_t input = input_data[i] - input_zeropoint;
|
||||
const int32_t output =
|
||||
MultiplyByQuantizedMultiplier(input, effective_scale_multiplier,
|
||||
effective_scale_shift) +
|
||||
output_zeropoint;
|
||||
const int32_t clamped_output =
|
||||
std::max(std::min(output, kMaxOutput), kMinOutput);
|
||||
output_data[i] = static_cast<output_type>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
|
||||
@@ -0,0 +1,99 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
inline int32 GetNearestNeighbor(const int input_value, const int32 input_size,
|
||||
const int32 output_size,
|
||||
const bool align_corners,
|
||||
const bool half_pixel_centers) {
|
||||
const float scale =
|
||||
(align_corners && output_size > 1)
|
||||
? (input_size - 1) / static_cast<float>(output_size - 1)
|
||||
: input_size / static_cast<float>(output_size);
|
||||
const float offset = half_pixel_centers ? 0.5f : 0.0f;
|
||||
int32 output_value = std::min(
|
||||
align_corners
|
||||
? static_cast<int32>(std::round((input_value + offset) * scale))
|
||||
: static_cast<int32>(std::floor((input_value + offset) * scale)),
|
||||
input_size - 1);
|
||||
if (half_pixel_centers) {
|
||||
output_value = std::max(static_cast<int32>(0), output_value);
|
||||
}
|
||||
return output_value;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void ResizeNearestNeighbor(
|
||||
const tflite::ResizeNearestNeighborParams& op_params,
|
||||
const RuntimeShape& unextended_input_shape, const T* input_data,
|
||||
const RuntimeShape& output_size_shape, const int32* output_size_data,
|
||||
const RuntimeShape& unextended_output_shape, T* output_data) {
|
||||
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
|
||||
const RuntimeShape input_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_input_shape);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
int32 input_height = input_shape.Dims(1);
|
||||
int32 input_width = input_shape.Dims(2);
|
||||
int32 depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
|
||||
// The Tensorflow version of this op allows resize on the width and height
|
||||
// axis only.
|
||||
TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2);
|
||||
int32 output_height = output_size_data[0];
|
||||
int32 output_width = output_size_data[1];
|
||||
|
||||
const int col_offset = input_shape.Dims(3);
|
||||
const int row_offset = input_shape.Dims(2) * col_offset;
|
||||
const int batch_offset = input_shape.Dims(1) * row_offset;
|
||||
|
||||
const T* input_ptr = input_data;
|
||||
T* output_ptr = output_data;
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int y = 0; y < output_height; ++y) {
|
||||
int32 in_y = GetNearestNeighbor(y, input_height, output_height,
|
||||
op_params.align_corners,
|
||||
op_params.half_pixel_centers);
|
||||
const T* y_input_ptr = input_ptr + in_y * row_offset;
|
||||
for (int x = 0; x < output_width; ++x) {
|
||||
int32 in_x = GetNearestNeighbor(x, input_width, output_width,
|
||||
op_params.align_corners,
|
||||
op_params.half_pixel_centers);
|
||||
const T* x_input_ptr = y_input_ptr + in_x * col_offset;
|
||||
memcpy(output_ptr, x_input_ptr, depth * sizeof(T));
|
||||
output_ptr += depth;
|
||||
}
|
||||
}
|
||||
input_ptr += batch_offset;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
|
||||
@@ -0,0 +1,51 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
inline float RoundToNearest(float value) {
|
||||
auto floor_val = std::floor(value);
|
||||
auto diff = value - floor_val;
|
||||
if ((diff < 0.5f) ||
|
||||
((diff == 0.5f) && (static_cast<int>(floor_val) % 2 == 0))) {
|
||||
return floor_val;
|
||||
} else {
|
||||
return floor_val = floor_val + 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
inline void Round(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
// Note that this implementation matches that of tensorFlow tf.round
|
||||
// and corresponds to the bankers rounding method.
|
||||
// cfenv (for fesetround) is not yet supported universally on Android, so
|
||||
// using a work around.
|
||||
output_data[i] = RoundToNearest(input_data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ROUND_H_
|
||||
@@ -0,0 +1,226 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
|
||||
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
inline void Softmax(const SoftmaxParams& params,
|
||||
const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
// Find max element value which we'll use to ensure numerical stability
|
||||
// taking advantage of the following equality:
|
||||
// exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
|
||||
float max = std::numeric_limits<float>::lowest();
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
max = std::max(max, input_data[i * depth + c]);
|
||||
}
|
||||
|
||||
// Compute sum.
|
||||
float sum = 0.f;
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
sum += std::exp((input_data[i * depth + c] - max) *
|
||||
static_cast<float>(params.beta));
|
||||
}
|
||||
|
||||
// Compute result.
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
output_data[i * depth + c] = std::exp((input_data[i * depth + c] - max) *
|
||||
static_cast<float>(params.beta)) /
|
||||
sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Quantized softmax with int8/uint8 input and int8/uint8/int16 output.
|
||||
template <typename InputT, typename OutputT>
|
||||
inline void Softmax(const SoftmaxParams& params,
|
||||
const RuntimeShape& input_shape, const InputT* input_data,
|
||||
const RuntimeShape& output_shape, OutputT* output_data) {
|
||||
const int32 input_beta_multiplier = params.input_multiplier;
|
||||
const int32 input_beta_left_shift = params.input_left_shift;
|
||||
const int diff_min = params.diff_min;
|
||||
// The representation chosen for the input to the exp() function is Q5.26.
|
||||
// We need to leave extra space since values that we skip might be as large as
|
||||
// -32 before multiplying by input_beta_multiplier, and therefore as large as
|
||||
// -16 afterwards. Note that exp(-8) is definitely not insignificant to
|
||||
// accumulation, but exp(-16) definitely is.
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
static const int kAccumulationIntegerBits = 12;
|
||||
using FixedPointScaledDiff =
|
||||
gemmlowp::FixedPoint<int32, kScaledDiffIntegerBits>;
|
||||
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>;
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
|
||||
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
InputT max_in_row = std::numeric_limits<InputT>::min();
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
max_in_row = std::max(max_in_row, input_data[i * depth + c]);
|
||||
}
|
||||
|
||||
FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
int32 input_diff =
|
||||
static_cast<int32>(input_data[i * depth + c]) - max_in_row;
|
||||
if (input_diff >= diff_min) {
|
||||
const int32 input_diff_rescaled =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
input_diff, input_beta_multiplier, input_beta_left_shift);
|
||||
const FixedPointScaledDiff scaled_diff_f8 =
|
||||
FixedPointScaledDiff::FromRaw(input_diff_rescaled);
|
||||
sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
|
||||
exp_on_negative_values(scaled_diff_f8));
|
||||
}
|
||||
}
|
||||
|
||||
int num_bits_over_unit;
|
||||
FixedPoint0 shifted_scale = FixedPoint0::FromRaw(GetReciprocal(
|
||||
sum_of_exps.raw(), kAccumulationIntegerBits, &num_bits_over_unit));
|
||||
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
int32 input_diff =
|
||||
static_cast<int32>(input_data[i * depth + c]) - max_in_row;
|
||||
if (input_diff >= diff_min) {
|
||||
const int32 input_diff_rescaled =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
input_diff, input_beta_multiplier, input_beta_left_shift);
|
||||
const FixedPointScaledDiff scaled_diff_f8 =
|
||||
FixedPointScaledDiff::FromRaw(input_diff_rescaled);
|
||||
|
||||
FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
|
||||
int32 unsat_output = gemmlowp::RoundingDivideByPOT(
|
||||
(shifted_scale * exp_in_0).raw(),
|
||||
num_bits_over_unit + 31 - (sizeof(OutputT) * 8));
|
||||
|
||||
const int32 shifted_output =
|
||||
unsat_output +
|
||||
static_cast<int32>(std::numeric_limits<OutputT>::min());
|
||||
|
||||
output_data[i * depth + c] = static_cast<OutputT>(std::max(
|
||||
std::min(shifted_output,
|
||||
static_cast<int32>(std::numeric_limits<OutputT>::max())),
|
||||
static_cast<int32>(std::numeric_limits<OutputT>::min())));
|
||||
} else {
|
||||
output_data[i * depth + c] = std::numeric_limits<OutputT>::min();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Quantized softmax with int16 input and int16 output.
|
||||
inline void SoftmaxInt16(const SoftmaxParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const int16_t* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
// Find the largest element
|
||||
int16_t max_in_row = std::numeric_limits<int16_t>::min();
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
max_in_row = std::max(max_in_row, input_data[i * depth + c]);
|
||||
}
|
||||
|
||||
// Compute exp(input - max_input)
|
||||
std::vector<int16_t> exp_result_Q015(depth);
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
int32_t input_diff = input_data[i * depth + c] - max_in_row;
|
||||
// scale the input_diff such that [-65535, 0] correspond to [-10.0, 0.0]
|
||||
int32_t scaled_diff = MultiplyByQuantizedMultiplier(
|
||||
input_diff, params.input_multiplier, params.input_left_shift);
|
||||
// recenter to [-32768, 32767]
|
||||
int32_t sym_scaled_diff = scaled_diff + 32767;
|
||||
int16_t sat_sym_scaled_diff =
|
||||
std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
|
||||
static_cast<int32_t>(32767));
|
||||
// apply the exp() LUT activation function
|
||||
exp_result_Q015[c] =
|
||||
generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
|
||||
}
|
||||
|
||||
// sum_of_exps is a Q16.15 fixed point format.
|
||||
int32_t sum_of_exps = 0;
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
// Q16.15 + Q0.15
|
||||
sum_of_exps += exp_result_Q015[c];
|
||||
}
|
||||
|
||||
// Compute the reciprocal 1/sum_of_exps
|
||||
uint8_t headroom_plus_one =
|
||||
CountLeadingZeros(static_cast<uint32_t>(sum_of_exps));
|
||||
int32_t shifted_sum =
|
||||
((static_cast<int64_t>(sum_of_exps) << (headroom_plus_one - 1)) +
|
||||
(1 << 13)) >>
|
||||
14;
|
||||
// since the LUT computes 1/(1 + x) we need to first compute x = (sum - 1).
|
||||
// also, the LUT expects a symmetrical input, so we must also recenter x
|
||||
// from [0, 65535] to [-32768, 32767].
|
||||
int32_t sym_shifted_sum = shifted_sum + (-((1 << 15) + (1 << 16)));
|
||||
int16_t sat_sym_shifted_sum = static_cast<int16_t>(
|
||||
std::min(std::max(sym_shifted_sum, static_cast<int32_t>(-32768)),
|
||||
static_cast<int32_t>(32767)));
|
||||
// apply 1/(1 + x) LUT activation function
|
||||
int16_t reciprocal_scale_Q015 = generic_int16_table_lookup(
|
||||
sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
|
||||
|
||||
// Rescale the exp_result with reciprocal
|
||||
// range of output is [0, 32767] correspond to [0.0, 1.0]
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
uint8_t right_shift = 31 - headroom_plus_one;
|
||||
int64_t round = 1 << (right_shift - 1);
|
||||
int32_t result = (static_cast<int64_t>(exp_result_Q015[c]) *
|
||||
static_cast<int64_t>(reciprocal_scale_Q015) +
|
||||
round) >>
|
||||
right_shift;
|
||||
output_data[i * depth + c] = static_cast<int16_t>(
|
||||
std::min(std::max(result, static_cast<int32_t>(0)),
|
||||
static_cast<int32_t>(32767)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
|
||||
@@ -0,0 +1,92 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
template <typename T>
|
||||
inline void StridedSlice(const tflite::StridedSliceParams& op_params,
|
||||
const RuntimeShape& unextended_input_shape,
|
||||
const T* input_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
T* output_data) {
|
||||
using strided_slice::LoopCondition;
|
||||
using strided_slice::StartForAxis;
|
||||
using strided_slice::StopForAxis;
|
||||
// Note that the output_shape is not used herein.
|
||||
tflite::StridedSliceParams params_copy = op_params;
|
||||
|
||||
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 5);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 5);
|
||||
const RuntimeShape input_shape =
|
||||
RuntimeShape::ExtendedShape(5, unextended_input_shape);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(5, unextended_output_shape);
|
||||
|
||||
// Reverse and pad to 5 dimensions because that is what the runtime code
|
||||
// requires (ie. all shapes must be 5D and are given backwards).
|
||||
strided_slice::StridedSlicePadIndices(¶ms_copy, 5);
|
||||
|
||||
const int start_0 = StartForAxis(params_copy, input_shape, 0);
|
||||
const int stop_0 = StopForAxis(params_copy, input_shape, 0, start_0);
|
||||
const int start_1 = StartForAxis(params_copy, input_shape, 1);
|
||||
const int stop_1 = StopForAxis(params_copy, input_shape, 1, start_1);
|
||||
const int start_2 = StartForAxis(params_copy, input_shape, 2);
|
||||
const int stop_2 = StopForAxis(params_copy, input_shape, 2, start_2);
|
||||
const int start_3 = StartForAxis(params_copy, input_shape, 3);
|
||||
const int stop_3 = StopForAxis(params_copy, input_shape, 3, start_3);
|
||||
const int start_4 = StartForAxis(params_copy, input_shape, 4);
|
||||
const int stop_4 = StopForAxis(params_copy, input_shape, 4, start_4);
|
||||
|
||||
T* out_ptr = output_data;
|
||||
for (int offset_0 = start_0 * input_shape.Dims(1),
|
||||
end_0 = stop_0 * input_shape.Dims(1),
|
||||
step_0 = params_copy.strides[0] * input_shape.Dims(1);
|
||||
!LoopCondition(offset_0, end_0, params_copy.strides[0]);
|
||||
offset_0 += step_0) {
|
||||
for (int offset_1 = (offset_0 + start_1) * input_shape.Dims(2),
|
||||
end_1 = (offset_0 + stop_1) * input_shape.Dims(2),
|
||||
step_1 = params_copy.strides[1] * input_shape.Dims(2);
|
||||
!LoopCondition(offset_1, end_1, params_copy.strides[1]);
|
||||
offset_1 += step_1) {
|
||||
for (int offset_2 = (offset_1 + start_2) * input_shape.Dims(3),
|
||||
end_2 = (offset_1 + stop_2) * input_shape.Dims(3),
|
||||
step_2 = params_copy.strides[2] * input_shape.Dims(3);
|
||||
!LoopCondition(offset_2, end_2, params_copy.strides[2]);
|
||||
offset_2 += step_2) {
|
||||
for (int offset_3 = (offset_2 + start_3) * input_shape.Dims(4),
|
||||
end_3 = (offset_2 + stop_3) * input_shape.Dims(4),
|
||||
step_3 = params_copy.strides[3] * input_shape.Dims(4);
|
||||
!LoopCondition(offset_3, end_3, params_copy.strides[3]);
|
||||
offset_3 += step_3) {
|
||||
for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
|
||||
!LoopCondition(offset_4, end_4, params_copy.strides[4]);
|
||||
offset_4 += params_copy.strides[4]) {
|
||||
*out_ptr++ = input_data[offset_4];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
|
||||
@@ -0,0 +1,468 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
inline void SubNonBroadcast(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const float* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const float* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] - input2_data[i], params.float_activation_min,
|
||||
params.float_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
inline void SubNonBroadcast(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] - input2_data[i], params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(b/151345304): We can implement BroadcastSub on buffers of arbitrary
|
||||
// dimensionality if the runtime code does a single loop over one dimension
|
||||
// that handles broadcasting as the base case. The code generator would then
|
||||
// generate max(D1, D2) nested for loops.
|
||||
// TODO(b/151345101): BroadcastSub is intentionally duplicated from
|
||||
// reference_ops.h. Once an optimized version is implemented and NdArrayDesc<T>
|
||||
// is no longer referenced in this file, move NdArrayDesc<T> from types.h to
|
||||
// reference_ops.h.
|
||||
template <int N = 5>
|
||||
inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const float* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const float* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/float");
|
||||
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
auto sub_func = [&](int indexes[N]) {
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
ActivationFunctionWithMinMax(
|
||||
input1_data[SubscriptToIndex(desc1, indexes)] -
|
||||
input2_data[SubscriptToIndex(desc2, indexes)],
|
||||
params.float_activation_min, params.float_activation_max);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
template <int N = 5>
|
||||
inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const uint8* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8");
|
||||
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
auto sub_func = [&](int indexes[N]) {
|
||||
const int32 input1_val =
|
||||
params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
|
||||
const int32 input2_val =
|
||||
params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sub, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
template <int N = 5>
|
||||
inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32");
|
||||
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
auto sub_func = [&](int indexes[N]) {
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
ActivationFunctionWithMinMax(
|
||||
input1_data[SubscriptToIndex(desc1, indexes)] -
|
||||
input2_data[SubscriptToIndex(desc2, indexes)],
|
||||
params.quantized_activation_min, params.quantized_activation_max);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
template <int N = 5>
|
||||
inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int8_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int8_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8");
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
auto sub_func = [&](int indexes[N]) {
|
||||
const int32_t input1_val =
|
||||
params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
|
||||
const int32_t input2_val =
|
||||
params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sub, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
static_cast<int8_t>(clamped_output);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
template <typename T, int N = 5>
|
||||
void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const T* input1_data,
|
||||
const RuntimeShape& input2_shape, const T* input2_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/templated");
|
||||
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
auto sub_func = [&](int indexes[N]) {
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
ActivationFunctionWithMinMax(
|
||||
input1_data[SubscriptToIndex(desc1, indexes)] -
|
||||
input2_data[SubscriptToIndex(desc2, indexes)],
|
||||
params.quantized_activation_min, params.quantized_activation_max);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
// Element-wise Sub that can often be used for inner loop of broadcast sub as
|
||||
// well as the non-broadcast sub.
|
||||
inline void SubElementwise(int size, const ArithmeticParams& params,
|
||||
const uint8* input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sub, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
// Element-wise add that can often be used for inner loop of broadcast add as
|
||||
// well as the non-broadcast add.
|
||||
inline void SubElementwise(int size, const ArithmeticParams& params,
|
||||
const int8_t* input1_data, const int8_t* input2_data,
|
||||
int8_t* output_data) {
|
||||
const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
|
||||
TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sub, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<int8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Sub(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8* input2_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
SubElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void Sub(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const int8_t* input2_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
|
||||
TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
|
||||
SubElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape,
|
||||
const T* input1_data, const RuntimeShape& input2_shape,
|
||||
const T* input2_data, const RuntimeShape& output_shape,
|
||||
T* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
const RuntimeShape extended_output_shape =
|
||||
RuntimeShape::ExtendedShape(4, output_shape);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)] -
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void SubWithActivation(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
ruy::profiler::ScopeLabel label("SubWithActivation");
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] - input2_data[i], params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
inline void SubWithActivation(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const float* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const float* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] - input2_data[i], params.float_activation_min,
|
||||
params.float_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
|
||||
@@ -0,0 +1,204 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_
|
||||
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace strided_slice {
|
||||
|
||||
// Use until std::clamp() is available from C++17.
|
||||
inline int Clamp(const int v, const int lo, const int hi) {
|
||||
TFLITE_DCHECK(!(hi < lo));
|
||||
if (hi < v) return hi;
|
||||
if (v < lo) return lo;
|
||||
return v;
|
||||
}
|
||||
|
||||
inline void StridedSlicePadIndices(tflite::StridedSliceParams* p,
|
||||
int dim_count) {
|
||||
// Add indices and mask bits to fully include extra dimensions
|
||||
TFLITE_CHECK_LE(dim_count, 5);
|
||||
TFLITE_CHECK_GE(dim_count, p->start_indices_count);
|
||||
TFLITE_CHECK_EQ(p->start_indices_count, p->stop_indices_count);
|
||||
TFLITE_CHECK_EQ(p->stop_indices_count, p->strides_count);
|
||||
|
||||
const int pad_count = dim_count - p->start_indices_count;
|
||||
|
||||
// Pad indices at start, so move arrays by pad_count.
|
||||
for (int i = p->start_indices_count - 1; i >= 0; --i) {
|
||||
p->strides[i + pad_count] = p->strides[i];
|
||||
p->start_indices[i + pad_count] = p->start_indices[i];
|
||||
p->stop_indices[i + pad_count] = p->stop_indices[i];
|
||||
}
|
||||
for (int i = 0; i < pad_count; ++i) {
|
||||
p->start_indices[i] = 0;
|
||||
p->stop_indices[i] = 1;
|
||||
p->strides[i] = 1;
|
||||
}
|
||||
|
||||
// Pad masks with 0s or 1s as required.
|
||||
p->shrink_axis_mask <<= pad_count;
|
||||
p->ellipsis_mask <<= pad_count;
|
||||
p->new_axis_mask <<= pad_count;
|
||||
p->begin_mask <<= pad_count;
|
||||
p->end_mask <<= pad_count;
|
||||
p->begin_mask |= (1 << pad_count) - 1;
|
||||
p->end_mask |= (1 << pad_count) - 1;
|
||||
|
||||
p->start_indices_count = dim_count;
|
||||
p->stop_indices_count = dim_count;
|
||||
p->strides_count = dim_count;
|
||||
}
|
||||
|
||||
// Return the index for the first element along that axis. This index will be a
|
||||
// positive integer between [0, axis_size - 1] that can be used to index
|
||||
// directly into the data.
|
||||
inline int StartForAxis(const tflite::StridedSliceParams& params,
|
||||
const RuntimeShape& input_shape, int axis) {
|
||||
const auto begin_mask = params.begin_mask;
|
||||
const auto* start_indices = params.start_indices;
|
||||
const auto* strides = params.strides;
|
||||
const int axis_size = input_shape.Dims(axis);
|
||||
if (axis_size == 0) {
|
||||
return 0;
|
||||
}
|
||||
// Begin with the specified index.
|
||||
int start = start_indices[axis];
|
||||
|
||||
// begin_mask override
|
||||
if (begin_mask & 1 << axis) {
|
||||
if (strides[axis] > 0) {
|
||||
// Forward iteration - use the first element. These values will get
|
||||
// clamped below (Note: We could have set them to 0 and axis_size-1, but
|
||||
// use lowest() and max() to maintain symmetry with StopForAxis())
|
||||
start = std::numeric_limits<int>::lowest();
|
||||
} else {
|
||||
// Backward iteration - use the last element.
|
||||
start = std::numeric_limits<int>::max();
|
||||
}
|
||||
}
|
||||
|
||||
// Handle negative indices
|
||||
if (start < 0) {
|
||||
start += axis_size;
|
||||
}
|
||||
|
||||
// Clamping
|
||||
start = Clamp(start, 0, axis_size - 1);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// Return the "real" index for the end of iteration along that axis. This is an
|
||||
// "end" in the traditional C sense, in that it points to one past the last
|
||||
// element. ie. So if you were iterating through all elements of a 1D array of
|
||||
// size 4, this function would return 4 as the stop, because it is one past the
|
||||
// "real" indices of 0, 1, 2 & 3.
|
||||
inline int StopForAxis(const tflite::StridedSliceParams& params,
|
||||
const RuntimeShape& input_shape, int axis,
|
||||
int start_for_axis) {
|
||||
const auto end_mask = params.end_mask;
|
||||
const auto shrink_axis_mask = params.shrink_axis_mask;
|
||||
const auto* stop_indices = params.stop_indices;
|
||||
const auto* strides = params.strides;
|
||||
const int axis_size = input_shape.Dims(axis);
|
||||
if (axis_size == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Begin with the specified index
|
||||
const bool shrink_axis = shrink_axis_mask & (1 << axis);
|
||||
int stop = stop_indices[axis];
|
||||
|
||||
// When shrinking an axis, the end position does not matter (and can be
|
||||
// incorrect when negative indexing is used, see Issue #19260). Always use
|
||||
// start_for_axis + 1 to generate a length 1 slice, since start_for_axis has
|
||||
// already been adjusted for negative indices.
|
||||
if (shrink_axis) {
|
||||
stop = start_for_axis + 1;
|
||||
}
|
||||
|
||||
// end_mask override
|
||||
if (end_mask & (1 << axis)) {
|
||||
if (strides[axis] > 0) {
|
||||
// Forward iteration - use the last element. These values will get
|
||||
// clamped below
|
||||
stop = std::numeric_limits<int>::max();
|
||||
} else {
|
||||
// Backward iteration - use the first element.
|
||||
stop = std::numeric_limits<int>::lowest();
|
||||
}
|
||||
}
|
||||
|
||||
// Handle negative indices
|
||||
if (stop < 0) {
|
||||
stop += axis_size;
|
||||
}
|
||||
|
||||
// Clamping
|
||||
// Because the end index points one past the last element, we need slightly
|
||||
// different clamping ranges depending on the direction.
|
||||
if (strides[axis] > 0) {
|
||||
// Forward iteration
|
||||
stop = Clamp(stop, 0, axis_size);
|
||||
} else {
|
||||
// Backward iteration
|
||||
stop = Clamp(stop, -1, axis_size - 1);
|
||||
}
|
||||
|
||||
return stop;
|
||||
}
|
||||
|
||||
inline bool LoopCondition(int index, int stop, int stride) {
|
||||
// True when we have reached the end of an axis and should loop.
|
||||
return stride > 0 ? index >= stop : index <= stop;
|
||||
}
|
||||
|
||||
inline tflite::StridedSliceParams BuildStridedSliceParams(
|
||||
int begin_mask, int end_mask, int shrink_axis_mask,
|
||||
const std::vector<int>& start_indices, const std::vector<int>& stop_indices,
|
||||
const std::vector<int>& strides) {
|
||||
tflite::StridedSliceParams op_params;
|
||||
const int dims_count = start_indices.size();
|
||||
|
||||
op_params.start_indices_count = dims_count;
|
||||
op_params.stop_indices_count = dims_count;
|
||||
op_params.strides_count = dims_count;
|
||||
for (int i = 0; i < dims_count; ++i) {
|
||||
op_params.start_indices[i] = start_indices[i];
|
||||
op_params.stop_indices[i] = stop_indices[i];
|
||||
op_params.strides[i] = strides[i];
|
||||
}
|
||||
|
||||
op_params.begin_mask = begin_mask;
|
||||
op_params.ellipsis_mask = 0;
|
||||
op_params.end_mask = end_mask;
|
||||
op_params.new_axis_mask = 0;
|
||||
op_params.shrink_axis_mask = shrink_axis_mask;
|
||||
|
||||
return op_params;
|
||||
}
|
||||
|
||||
} // namespace strided_slice
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_STRIDED_SLICE_LOGIC_H_
|
||||
144
code/lib/tfmicro/tensorflow/lite/kernels/internal/tensor.h
Normal file
144
code/lib/tfmicro/tensorflow/lite/kernels/internal/tensor.h
Normal file
@@ -0,0 +1,144 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_H_
|
||||
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/string_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
inline RuntimeShape GetTensorShape(std::vector<int32_t> data) {
|
||||
return RuntimeShape(data.size(), data.data());
|
||||
}
|
||||
|
||||
// A list of tensors in a format that can be used by kernels like split and
|
||||
// concatenation.
|
||||
template <typename T>
|
||||
class VectorOfTensors {
|
||||
public:
|
||||
// Build with the tensors in 'tensor_list'.
|
||||
VectorOfTensors(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list) {
|
||||
int num_tensors = tensor_list.size;
|
||||
|
||||
all_data_.reserve(num_tensors);
|
||||
all_shape_.reserve(num_tensors);
|
||||
all_shape_ptr_.reserve(num_tensors);
|
||||
|
||||
for (int i = 0; i < num_tensors; ++i) {
|
||||
TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
all_data_.push_back(GetTensorData<T>(t));
|
||||
all_shape_.push_back(GetTensorShape(t));
|
||||
}
|
||||
|
||||
// Taking the pointer from inside a std::vector is only OK if the vector is
|
||||
// never modified, so we populate all_shape in the previous loop and then we
|
||||
// are free to grab iterators here.
|
||||
for (int i = 0; i < num_tensors; ++i) {
|
||||
all_shape_ptr_.push_back(&all_shape_[i]);
|
||||
}
|
||||
}
|
||||
// Return a pointer to the data pointers of all tensors in the list. For
|
||||
// example:
|
||||
// float* const* f = v.data();
|
||||
// f[0][1] is the second element of the first tensor.
|
||||
T* const* data() const { return all_data_.data(); }
|
||||
|
||||
// Return a pointer the shape pointers of all tensors in the list. For
|
||||
// example:
|
||||
// const RuntimeShape* const* d = v.dims();
|
||||
// dims[1] are the dimensions of the second tensor in the list.
|
||||
const RuntimeShape* const* shapes() const { return all_shape_ptr_.data(); }
|
||||
|
||||
private:
|
||||
std::vector<T*> all_data_;
|
||||
std::vector<RuntimeShape> all_shape_;
|
||||
std::vector<RuntimeShape*> all_shape_ptr_;
|
||||
};
|
||||
|
||||
// A list of quantized tensors in a format that can be used by kernels like
|
||||
// split and concatenation.
|
||||
class VectorOfQuantizedTensors : public VectorOfTensors<uint8> {
|
||||
public:
|
||||
// Build with the tensors in 'tensor_list'.
|
||||
VectorOfQuantizedTensors(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list)
|
||||
: VectorOfTensors<uint8>(context, tensor_list) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
zero_point_.push_back(t->params.zero_point);
|
||||
scale_.push_back(t->params.scale);
|
||||
}
|
||||
}
|
||||
|
||||
const float* scale() const { return scale_.data(); }
|
||||
const int32* zero_point() const { return zero_point_.data(); }
|
||||
|
||||
private:
|
||||
std::vector<int32> zero_point_;
|
||||
std::vector<float> scale_;
|
||||
};
|
||||
|
||||
// Writes randomly accessed values from `input` sequentially into `output`.
|
||||
template <typename T>
|
||||
class SequentialTensorWriter {
|
||||
public:
|
||||
SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
input_data_ = GetTensorData<T>(input);
|
||||
output_ptr_ = GetTensorData<T>(output);
|
||||
}
|
||||
SequentialTensorWriter(const T* input_data, T* output_data)
|
||||
: input_data_(input_data), output_ptr_(output_data) {}
|
||||
|
||||
void Write(int position) { *output_ptr_++ = input_data_[position]; }
|
||||
void WriteN(int position, int len) {
|
||||
memcpy(output_ptr_, &input_data_[position], sizeof(T) * len);
|
||||
output_ptr_ += len;
|
||||
}
|
||||
|
||||
private:
|
||||
const T* input_data_;
|
||||
T* output_ptr_;
|
||||
};
|
||||
|
||||
template <>
|
||||
class SequentialTensorWriter<string> {
|
||||
public:
|
||||
SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output)
|
||||
: input_(input), output_(output) {}
|
||||
~SequentialTensorWriter() { buffer_.WriteToTensor(output_, nullptr); }
|
||||
|
||||
void Write(int position) { this->WriteN(position, 1); }
|
||||
void WriteN(int position, int len) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
buffer_.AddString(GetString(input_, position + i));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const TfLiteTensor* input_;
|
||||
TfLiteTensor* output_;
|
||||
DynamicBuffer buffer_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_H_
|
||||
@@ -0,0 +1,47 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
template <typename T>
|
||||
inline T* GetTensorData(TfLiteTensor* tensor) {
|
||||
return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline const T* GetTensorData(const TfLiteTensor* tensor) {
|
||||
return tensor != nullptr ? reinterpret_cast<const T*>(tensor->data.raw)
|
||||
: nullptr;
|
||||
}
|
||||
|
||||
inline RuntimeShape GetTensorShape(const TfLiteTensor* tensor) {
|
||||
if (tensor == nullptr) {
|
||||
return RuntimeShape();
|
||||
}
|
||||
|
||||
TfLiteIntArray* dims = tensor->dims;
|
||||
const int dims_size = dims->size;
|
||||
const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
|
||||
return RuntimeShape(dims_size, dims_data);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
|
||||
1127
code/lib/tfmicro/tensorflow/lite/kernels/internal/types.h
Normal file
1127
code/lib/tfmicro/tensorflow/lite/kernels/internal/types.h
Normal file
File diff suppressed because it is too large
Load Diff
261
code/lib/tfmicro/tensorflow/lite/kernels/kernel_util.cc
Normal file
261
code/lib/tfmicro/tensorflow/lite/kernels/kernel_util.cc
Normal file
@@ -0,0 +1,261 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Per-axis
|
||||
TfLiteStatus PopulateConvolutionQuantizationParams(
|
||||
TfLiteContext* context, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
|
||||
int32_t* output_activation_min, int32_t* output_activation_max,
|
||||
int32_t* per_channel_multiplier, int* per_channel_shift) {
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
return PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, activation, multiplier, shift,
|
||||
output_activation_min, output_activation_max, per_channel_multiplier,
|
||||
per_channel_shift, affine_quantization->scale->size);
|
||||
}
|
||||
|
||||
// Per-axis & per-tensor
|
||||
TfLiteStatus PopulateConvolutionQuantizationParams(
|
||||
TfLiteContext* context, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
|
||||
int32_t* output_activation_min, int32_t* output_activation_max,
|
||||
int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels) {
|
||||
TF_LITE_ENSURE_EQ(context, input->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
// TODO(jianlijianli): Enable bias type check and bias scale == input scale
|
||||
// * filter scale for each channel in affine quantization once bias
|
||||
// quantization is properly populated.
|
||||
// TF_LITE_ENSURE_EQ(context, bias->quantization.type,
|
||||
// kTfLiteAffineQuantization);
|
||||
|
||||
// Check data type.
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
const bool is_per_channel = affine_quantization->scale->size > 1;
|
||||
if (is_per_channel) {
|
||||
// Currently only Int8/Int16 is supported for per channel quantization.
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
|
||||
TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, num_channels);
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context, num_channels,
|
||||
filter->dims->data[affine_quantization->quantized_dimension]);
|
||||
}
|
||||
|
||||
// Populate multiplier and shift using affine quantization.
|
||||
const float input_scale = input->params.scale;
|
||||
const float output_scale = output->params.scale;
|
||||
const float* filter_scales = affine_quantization->scale->data;
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
// If per-tensor quantization parameter is specified, broadcast it along the
|
||||
// quantization dimension (channels_out).
|
||||
const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
|
||||
const double filter_scale = static_cast<double>(scale);
|
||||
const double effective_output_scale = static_cast<double>(input_scale) *
|
||||
filter_scale /
|
||||
static_cast<double>(output_scale);
|
||||
int32_t significand;
|
||||
int channel_shift;
|
||||
QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
|
||||
per_channel_multiplier[i] = significand;
|
||||
per_channel_shift[i] = channel_shift;
|
||||
}
|
||||
|
||||
// Populate scalar quantization parameters.
|
||||
// This check on legacy quantization parameters is kept only for backward
|
||||
// compatibility.
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
// Check bias scale == input scale * filter scale.
|
||||
double real_multiplier = 0.0;
|
||||
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
|
||||
context, input, filter, bias, output, &real_multiplier));
|
||||
int exponent;
|
||||
|
||||
// Populate quantization parameters with multiplier and shift.
|
||||
QuantizeMultiplier(real_multiplier, multiplier, &exponent);
|
||||
*shift = -exponent;
|
||||
}
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, activation, output, output_activation_min,
|
||||
output_activation_max));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias,
|
||||
TfLiteTensor* output,
|
||||
double* multiplier) {
|
||||
const double input_product_scale = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(filter->params.scale);
|
||||
// TODO(ahentz): The following conditions must be guaranteed by the training
|
||||
// pipeline.
|
||||
if (bias) {
|
||||
const double bias_scale = static_cast<double>(bias->params.scale);
|
||||
// Here we're making sure the input_product_scale & bias_scale the same.
|
||||
// Normally this should be guaranteed by the training pipeline, we are
|
||||
// setting the threshold to be 2e-6 to allow some numeric stability
|
||||
// difference.
|
||||
TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <= 2e-6);
|
||||
}
|
||||
return GetQuantizedConvolutionMultipler(context, input, filter, output,
|
||||
multiplier);
|
||||
}
|
||||
|
||||
TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
TfLiteTensor* output,
|
||||
double* multiplier) {
|
||||
const double input_product_scale =
|
||||
static_cast<double>(input->params.scale * filter->params.scale);
|
||||
TF_LITE_ENSURE(context, input_product_scale >= 0);
|
||||
*multiplier = input_product_scale / static_cast<double>(output->params.scale);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
namespace {
|
||||
void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
|
||||
int32_t qmin, int32_t qmax,
|
||||
TfLiteTensor* output,
|
||||
int32_t* act_min, int32_t* act_max) {
|
||||
const auto scale = output->params.scale;
|
||||
const auto zero_point = output->params.zero_point;
|
||||
|
||||
auto quantize = [scale, zero_point](float f) {
|
||||
return zero_point + static_cast<int32_t>(TfLiteRound(f / scale));
|
||||
};
|
||||
|
||||
if (activation == kTfLiteActRelu) {
|
||||
*act_min = std::max(qmin, quantize(0.0));
|
||||
*act_max = qmax;
|
||||
} else if (activation == kTfLiteActRelu6) {
|
||||
*act_min = std::max(qmin, quantize(0.0));
|
||||
*act_max = std::min(qmax, quantize(6.0));
|
||||
} else if (activation == kTfLiteActRelu1) {
|
||||
*act_min = std::max(qmin, quantize(-1.0));
|
||||
*act_max = std::min(qmax, quantize(1.0));
|
||||
} else {
|
||||
*act_min = qmin;
|
||||
*act_max = qmax;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
|
||||
TfLiteFusedActivation activation,
|
||||
TfLiteTensor* output,
|
||||
int32_t* act_min,
|
||||
int32_t* act_max) {
|
||||
int32_t qmin = 0;
|
||||
int32_t qmax = 0;
|
||||
if (output->type == kTfLiteUInt8) {
|
||||
qmin = std::numeric_limits<uint8_t>::min();
|
||||
qmax = std::numeric_limits<uint8_t>::max();
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
qmin = std::numeric_limits<int8_t>::min();
|
||||
qmax = std::numeric_limits<int8_t>::max();
|
||||
} else if (output->type == kTfLiteInt16) {
|
||||
qmin = std::numeric_limits<int16_t>::min();
|
||||
qmax = std::numeric_limits<int16_t>::max();
|
||||
} else {
|
||||
TF_LITE_ENSURE(context, false);
|
||||
}
|
||||
|
||||
CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
|
||||
act_max);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
|
||||
return TfLiteIntArrayEqual(input1->dims, input2->dims);
|
||||
}
|
||||
|
||||
// TODO(petewarden): Having macros around this is ugly, look at other strategies
|
||||
// before replicating this approach elsewhere.
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteIntArray** output_shape) {
|
||||
int dims1 = NumDimensions(input1);
|
||||
int dims2 = NumDimensions(input2);
|
||||
int out_dims = std::max(dims1, dims2);
|
||||
if (NumElements(input1) == 0) {
|
||||
*output_shape = TfLiteIntArrayCopy(input1->dims);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
|
||||
TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
|
||||
for (int i = 0; i < out_dims; ++i) {
|
||||
int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
|
||||
int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
|
||||
TF_LITE_ENSURE(context, d1 == d2 || d1 == 1 || d2 == 1);
|
||||
shape->data[out_dims - i - 1] = std::max(d1, d2);
|
||||
}
|
||||
*output_shape = shape.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
const TfLiteTensor* input3,
|
||||
TfLiteIntArray** output_shape) {
|
||||
int dims1 = NumDimensions(input1);
|
||||
int dims2 = NumDimensions(input2);
|
||||
int dims3 = NumDimensions(input3);
|
||||
int out_dims = std::max(std::max(dims1, dims2), dims3);
|
||||
std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
|
||||
TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
|
||||
for (int i = 0; i < out_dims; ++i) {
|
||||
int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
|
||||
int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
|
||||
int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
|
||||
int max_value = std::max(std::max(d1, d2), d3);
|
||||
TF_LITE_ENSURE(context, d1 == 1 || d1 == max_value);
|
||||
TF_LITE_ENSURE(context, d2 == 1 || d2 == max_value);
|
||||
TF_LITE_ENSURE(context, d3 == 1 || d3 == max_value);
|
||||
shape->data[out_dims - i - 1] = max_value;
|
||||
}
|
||||
*output_shape = shape.release();
|
||||
return kTfLiteOk;
|
||||
}
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
} // namespace tflite
|
||||
193
code/lib/tfmicro/tensorflow/lite/kernels/kernel_util.h
Normal file
193
code/lib/tfmicro/tensorflow/lite/kernels/kernel_util.h
Normal file
@@ -0,0 +1,193 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
inline int NumDimensions(const TfLiteTensor* t) { return t->dims->size; }
|
||||
inline int SizeOfDimension(const TfLiteTensor* t, int dim) {
|
||||
return t->dims->data[dim];
|
||||
}
|
||||
inline const TfLiteTensor* GetInput(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return &context
|
||||
->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
|
||||
}
|
||||
// Note: You must check if result is not null:
|
||||
// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx);
|
||||
// TF_LITE_ENSURE(context, my_tensor != nullptr);
|
||||
inline TfLiteTensor* GetVariableInput(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
TfLiteTensor* tensor =
|
||||
&context->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
|
||||
return (tensor->is_variable) ? tensor : nullptr;
|
||||
}
|
||||
inline TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index) {
|
||||
return &context
|
||||
->tensors[flatbuffers::EndianScalar(node->outputs->data[index])];
|
||||
}
|
||||
inline TfLiteTensor* GetTemporary(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return &context->tensors[flatbuffers::EndianScalar(
|
||||
node->temporaries->data[index])];
|
||||
}
|
||||
inline const TfLiteTensor* GetIntermediates(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return &context->tensors[node->intermediates->data[index]];
|
||||
}
|
||||
inline int NumInputs(const TfLiteNode* node) { return node->inputs->size; }
|
||||
inline int NumOutputs(const TfLiteNode* node) { return node->outputs->size; }
|
||||
inline int NumIntermediates(const TfLiteNode* node) {
|
||||
return node->intermediates->size;
|
||||
}
|
||||
|
||||
inline int64_t NumElements(const TfLiteIntArray* dims) {
|
||||
int64_t count = 1;
|
||||
for (int i = 0; i < dims->size; ++i) {
|
||||
count *= dims->data[i];
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
inline int64_t NumElements(const TfLiteTensor* t) {
|
||||
return NumElements(t->dims);
|
||||
}
|
||||
|
||||
inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
int index) {
|
||||
const bool use_tensor = index < node->inputs->size &&
|
||||
node->inputs->data[index] != kTfLiteOptionalTensor;
|
||||
if (use_tensor) {
|
||||
return &context
|
||||
->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Determines whether tensor is constant.
|
||||
inline bool IsConstantTensor(const TfLiteTensor* tensor) {
|
||||
return tensor->allocation_type == kTfLiteMmapRo;
|
||||
}
|
||||
|
||||
// Determines whether tensor is dynamic. Note that a tensor can be non-const and
|
||||
// not dynamic. This function specifically checks for a dynamic tensor.
|
||||
inline bool IsDynamicTensor(const TfLiteTensor* tensor) {
|
||||
return tensor->allocation_type == kTfLiteDynamic;
|
||||
}
|
||||
|
||||
// Sets tensor to dynamic.
|
||||
inline void SetTensorToDynamic(TfLiteTensor* tensor) {
|
||||
if (tensor->allocation_type != kTfLiteDynamic) {
|
||||
tensor->allocation_type = kTfLiteDynamic;
|
||||
tensor->data.raw = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Determines whether it is a hybrid op - one that has float inputs and
|
||||
// quantized weights.
|
||||
inline bool IsHybridOp(const TfLiteTensor* input, const TfLiteTensor* weight) {
|
||||
return ((weight->type == kTfLiteUInt8 || weight->type == kTfLiteInt8) &&
|
||||
input->type == kTfLiteFloat32);
|
||||
}
|
||||
|
||||
// Check dimensionality match and populate OpData for Conv and DepthwiseConv.
|
||||
TfLiteStatus PopulateConvolutionQuantizationParams(
|
||||
TfLiteContext* context, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
|
||||
int32_t* output_activation_min, int32_t* output_activation_max,
|
||||
int32_t* per_channel_multiplier, int* per_channel_shift);
|
||||
|
||||
TfLiteStatus PopulateConvolutionQuantizationParams(
|
||||
TfLiteContext* context, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
|
||||
int32_t* output_activation_min, int32_t* output_activation_max,
|
||||
int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels);
|
||||
|
||||
// Calculates the multiplication factor for a quantized convolution (or
|
||||
// quantized depthwise convolution) involving the given tensors. Returns an
|
||||
// error if the scales of the tensors are not compatible.
|
||||
TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias,
|
||||
TfLiteTensor* output,
|
||||
double* multiplier);
|
||||
|
||||
TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
TfLiteTensor* output,
|
||||
double* multiplier);
|
||||
|
||||
// Calculates the useful quantized range of an activation layer given its
|
||||
// activation tensor.
|
||||
TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
|
||||
TfLiteFusedActivation activation,
|
||||
TfLiteTensor* output,
|
||||
int32_t* act_min,
|
||||
int32_t* act_max);
|
||||
|
||||
// Calculates the useful range of an activation layer given its activation
|
||||
// tensor.a
|
||||
template <typename T>
|
||||
void CalculateActivationRange(TfLiteFusedActivation activation,
|
||||
T* activation_min, T* activation_max) {
|
||||
if (activation == kTfLiteActRelu) {
|
||||
*activation_min = 0;
|
||||
*activation_max = std::numeric_limits<T>::max();
|
||||
} else if (activation == kTfLiteActRelu6) {
|
||||
*activation_min = 0;
|
||||
*activation_max = 6;
|
||||
} else if (activation == kTfLiteActRelu1) {
|
||||
*activation_min = -1;
|
||||
*activation_max = 1;
|
||||
} else {
|
||||
*activation_min = std::numeric_limits<T>::lowest();
|
||||
*activation_max = std::numeric_limits<T>::max();
|
||||
}
|
||||
}
|
||||
|
||||
// Return true if the given tensors have the same shape.
|
||||
bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2);
|
||||
|
||||
// Calculates the output_shape that is necessary for element-wise operations
|
||||
// with broadcasting involving the two input tensors.
|
||||
TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteIntArray** output_shape);
|
||||
|
||||
// Calculates the output_shape that is necessary for element-wise operations
|
||||
// with broadcasting involving the three input tensors.
|
||||
TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
const TfLiteTensor* input3,
|
||||
TfLiteIntArray** output_shape);
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
|
||||
75
code/lib/tfmicro/tensorflow/lite/kernels/op_macros.h
Normal file
75
code/lib/tfmicro/tensorflow/lite/kernels/op_macros.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
|
||||
|
||||
// If we're on a platform without standard IO functions, fall back to a
|
||||
// non-portable function.
|
||||
#ifdef TF_LITE_MCU_DEBUG_LOG
|
||||
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
#define DEBUG_LOG(x) \
|
||||
do { \
|
||||
DebugLog(x); \
|
||||
} while (0)
|
||||
|
||||
inline void InfiniteLoop() {
|
||||
DEBUG_LOG("HALTED\n");
|
||||
while (1) {
|
||||
}
|
||||
}
|
||||
|
||||
#define TFLITE_ABORT InfiniteLoop();
|
||||
|
||||
#else // TF_LITE_MCU_DEBUG_LOG
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#define DEBUG_LOG(x) \
|
||||
do { \
|
||||
fprintf(stderr, "%s", (x)); \
|
||||
} while (0)
|
||||
|
||||
#define TFLITE_ABORT abort()
|
||||
|
||||
#endif // TF_LITE_MCU_DEBUG_LOG
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define TFLITE_ASSERT_FALSE (static_cast<void>(0))
|
||||
#else
|
||||
#define TFLITE_ASSERT_FALSE TFLITE_ABORT
|
||||
#endif
|
||||
|
||||
#define TF_LITE_FATAL(msg) \
|
||||
do { \
|
||||
DEBUG_LOG(msg); \
|
||||
DEBUG_LOG("\nFATAL\n"); \
|
||||
TFLITE_ABORT; \
|
||||
} while (0)
|
||||
|
||||
#define TF_LITE_ASSERT(x) \
|
||||
do { \
|
||||
if (!(x)) TF_LITE_FATAL(#x); \
|
||||
} while (0)
|
||||
|
||||
#define TF_LITE_ASSERT_EQ(x, y) \
|
||||
do { \
|
||||
if ((x) != (y)) TF_LITE_FATAL(#x " didn't equal " #y); \
|
||||
} while (0)
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
|
||||
80
code/lib/tfmicro/tensorflow/lite/kernels/padding.h
Normal file
80
code/lib/tfmicro/tensorflow/lite/kernels/padding.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_PADDING_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_PADDING_H_
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// TODO(renjieliu): Migrate others to use ComputePaddingWithLeftover.
|
||||
inline int ComputePadding(int stride, int dilation_rate, int in_size,
|
||||
int filter_size, int out_size) {
|
||||
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
|
||||
int padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
|
||||
return padding > 0 ? padding : 0;
|
||||
}
|
||||
|
||||
// It's not guaranteed that padding is symmetric. It's important to keep
|
||||
// offset for algorithms need all paddings.
|
||||
inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
|
||||
int filter_size, int out_size,
|
||||
int* offset) {
|
||||
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
|
||||
int total_padding =
|
||||
((out_size - 1) * stride + effective_filter_size - in_size);
|
||||
total_padding = total_padding > 0 ? total_padding : 0;
|
||||
*offset = total_padding % 2;
|
||||
return total_padding / 2;
|
||||
}
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
inline int ComputeOutSize(TfLitePadding padding, int image_size,
|
||||
int filter_size, int stride, int dilation_rate = 1) {
|
||||
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
|
||||
switch (padding) {
|
||||
case kTfLitePaddingSame:
|
||||
return (image_size + stride - 1) / stride;
|
||||
case kTfLitePaddingValid:
|
||||
return (image_size + stride - effective_filter_size) / stride;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline TfLitePaddingValues ComputePaddingHeightWidth(
|
||||
int stride_height, int stride_width, int dilation_rate_height,
|
||||
int dilation_rate_width, int in_height, int in_width, int filter_height,
|
||||
int filter_width, TfLitePadding padding, int* out_height, int* out_width) {
|
||||
*out_width = ComputeOutSize(padding, in_width, filter_width, stride_width,
|
||||
dilation_rate_width);
|
||||
*out_height = ComputeOutSize(padding, in_height, filter_height, stride_height,
|
||||
dilation_rate_height);
|
||||
|
||||
TfLitePaddingValues padding_values;
|
||||
int offset = 0;
|
||||
padding_values.height =
|
||||
ComputePaddingWithOffset(stride_height, dilation_rate_height, in_height,
|
||||
filter_height, *out_height, &offset);
|
||||
padding_values.height_offset = offset;
|
||||
padding_values.width =
|
||||
ComputePaddingWithOffset(stride_width, dilation_rate_width, in_width,
|
||||
filter_width, *out_width, &offset);
|
||||
padding_values.width_offset = offset;
|
||||
return padding_values;
|
||||
}
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_PADDING_H_
|
||||
32
code/lib/tfmicro/tensorflow/lite/micro/compatibility.h
Normal file
32
code/lib/tfmicro/tensorflow/lite/micro/compatibility.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
|
||||
#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
|
||||
|
||||
// C++ will automatically create class-specific delete operators for virtual
|
||||
// objects, which by default call the global delete function. For embedded
|
||||
// applications we want to avoid this, and won't be calling new/delete on these
|
||||
// objects, so we need to override the default implementation with one that does
|
||||
// nothing to avoid linking in ::delete().
|
||||
// This macro needs to be included in all subclasses of a virtual base class in
|
||||
// the private section.
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
#define TF_LITE_REMOVE_VIRTUAL_DELETE \
|
||||
void operator delete(void* p) {}
|
||||
#else
|
||||
#define TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
#endif
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
|
||||
41
code/lib/tfmicro/tensorflow/lite/micro/debug_log.cc
Normal file
41
code/lib/tfmicro/tensorflow/lite/micro/debug_log.cc
Normal file
@@ -0,0 +1,41 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Reference implementation of the DebugLog() function that's required for a
|
||||
// platform to support the TensorFlow Lite for Microcontrollers library. This is
|
||||
// the only function that's absolutely required to be available on a target
|
||||
// device, since it's used for communicating test results back to the host so
|
||||
// that we can verify the implementation is working correctly.
|
||||
// It's designed to be as easy as possible to supply an implementation though.
|
||||
// On platforms that have a POSIX stack or C library, it can be written as a
|
||||
// single call to `fprintf(stderr, "%s", s)` to output a string to the error
|
||||
// stream of the console, but if there's no OS or C library available, there's
|
||||
// almost always an equivalent way to write out a string to some serial
|
||||
// interface that can be used instead. For example on Arm M-series MCUs, calling
|
||||
// the `bkpt #0xAB` assembler instruction will output the string in r1 to
|
||||
// whatever debug serial connection is available. If you're running mbed, you
|
||||
// can do the same by creating `Serial pc(USBTX, USBRX)` and then calling
|
||||
// `pc.printf("%s", s)`.
|
||||
// To add an equivalent function for your own platform, create your own
|
||||
// implementation file, and place it in a subfolder with named after the OS
|
||||
// you're targeting. For example, see the Cortex M bare metal version in
|
||||
// tensorflow/lite/micro/bluepill/debug_log.cc or the mbed one on
|
||||
// tensorflow/lite/micro/mbed/debug_log.cc.
|
||||
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
extern "C" void DebugLog(const char* s) { fprintf(stderr, "%s", s); }
|
||||
23
code/lib/tfmicro/tensorflow/lite/micro/debug_log.h
Normal file
23
code/lib/tfmicro/tensorflow/lite/micro/debug_log.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
|
||||
// This function should be implemented by each target platform, and provide a
|
||||
// way for strings to be output to some text stream. For more information, see
|
||||
// tensorflow/lite/micro/debug_log.cc.
|
||||
extern "C" void DebugLog(const char* s);
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
@@ -0,0 +1,55 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Returns the floating point value for a fused activation:
|
||||
inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
|
||||
switch (act) {
|
||||
case kTfLiteActNone:
|
||||
return a;
|
||||
case kTfLiteActRelu:
|
||||
return std::max(0.0f, a);
|
||||
case kTfLiteActRelu1:
|
||||
return std::max(-1.0f, std::min(a, 1.0f));
|
||||
case kTfLiteActRelu6:
|
||||
return std::max(0.0f, std::min(a, 6.0f));
|
||||
case kTfLiteActTanh:
|
||||
return std::tanh(a);
|
||||
case kTfLiteActSignBit:
|
||||
return std::signbit(a);
|
||||
case kTfLiteActSigmoid:
|
||||
return 1.0f / (1.0f + std::exp(-a));
|
||||
}
|
||||
return 0.0f; // To indicate an unsupported activation (i.e. when a new fused
|
||||
// activation is added to the enum and not handled here).
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
||||
186
code/lib/tfmicro/tensorflow/lite/micro/kernels/activations.cc
Normal file
186
code/lib/tfmicro/tensorflow/lite/micro/kernels/activations.cc
Normal file
@@ -0,0 +1,186 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename Q>
|
||||
inline void ReluQuantized(int32_t lower, const RuntimeShape& input_shape,
|
||||
const Q* input_data, const RuntimeShape& output_shape,
|
||||
Q* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const Q val = input_data[i];
|
||||
const Q clamped = val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const float val = input_data[i];
|
||||
const float lower = 0.0f;
|
||||
const float clamped = val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
inline void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const float val = input_data[i];
|
||||
const float upper = 6.0f;
|
||||
const float lower = 0.0f;
|
||||
const float clamped = val > upper ? upper : val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Q>
|
||||
inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
|
||||
const Q* input_data,
|
||||
const RuntimeShape& output_shape, Q* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const Q val = input_data[i];
|
||||
const Q clamped = val > upper ? upper : val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
ReluFloat(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
ReluQuantized<int8_t>(input->params.zero_point, GetTensorShape(input),
|
||||
GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
ReluQuantized<uint8_t>(input->params.zero_point, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
Relu6Float(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
const int8_t six = FloatToAsymmetricQuantizedInt8(
|
||||
6.0f, input->params.scale, input->params.zero_point);
|
||||
const int8_t zero = input->params.zero_point;
|
||||
Relu6Quantized<int8_t>(
|
||||
zero, six, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
const uint8_t six = FloatToAsymmetricQuantizedUInt8(
|
||||
6.0f, input->params.scale, input->params.zero_point);
|
||||
const uint8_t zero = input->params.zero_point;
|
||||
Relu6Quantized<uint8_t>(
|
||||
zero, six, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_RELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::ReluPrepare,
|
||||
/*invoke=*/activations::ReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RELU6() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::Relu6Prepare,
|
||||
/*invoke=*/activations::Relu6Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
204
code/lib/tfmicro/tensorflow/lite/micro/kernels/add.cc
Normal file
204
code/lib/tfmicro/tensorflow/lite/micro/kernels/add.cc
Normal file
@@ -0,0 +1,204 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/add.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace add {
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
bool requires_broadcast;
|
||||
|
||||
// These fields are used in both the general 8-bit -> 8bit quantized path,
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32 output_activation_min;
|
||||
int32 output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32 input1_multiplier;
|
||||
int32 input2_multiplier;
|
||||
int32 output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output,
|
||||
OpData* data) {
|
||||
data->requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
// 8bit -> 8bit general quantized path, with general rescalings
|
||||
data->input1_offset = -input1->params.zero_point;
|
||||
data->input2_offset = -input2->params.zero_point;
|
||||
data->output_offset = output->params.zero_point;
|
||||
data->left_shift = 20;
|
||||
const double twice_max_input_scale =
|
||||
2 * static_cast<double>(
|
||||
std::max(input1->params.scale, input2->params.scale));
|
||||
const double real_input1_multiplier =
|
||||
static_cast<double>(input1->params.scale) / twice_max_input_scale;
|
||||
const double real_input2_multiplier =
|
||||
static_cast<double>(input2->params.scale) / twice_max_input_scale;
|
||||
const double real_output_multiplier =
|
||||
twice_max_input_scale /
|
||||
((1 << data->left_shift) * static_cast<double>(output->params.scale));
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_output_multiplier, &data->output_multiplier, &data->output_shift);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
|
||||
const OpData* data, const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
#define TF_LITE_ADD(opname) \
|
||||
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||
GetTensorData<float>(output))
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_ADD(BroadcastAdd4DSlow);
|
||||
} else {
|
||||
TF_LITE_ADD(Add);
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteAddParams* params, const OpData* data,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
op_params.input1_offset = data->input1_offset;
|
||||
op_params.input1_multiplier = data->input1_multiplier;
|
||||
op_params.input1_shift = data->input1_shift;
|
||||
op_params.input2_offset = data->input2_offset;
|
||||
op_params.input2_multiplier = data->input2_multiplier;
|
||||
op_params.input2_shift = data->input2_shift;
|
||||
op_params.output_offset = data->output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_ADD(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output));
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
|
||||
} else {
|
||||
TF_LITE_ADD(reference_integer_ops, Add, int8_t);
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_ADD(reference_ops, Add, uint8_t);
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, &data));
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalAdd(context, node, params, &data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, &data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace add
|
||||
|
||||
TfLiteRegistration* Register_ADD() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,83 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Register each supported op with:
|
||||
// AddBuiltin(<operator ID>, <registration>, [min version], [max version])
|
||||
AllOpsResolver::AllOpsResolver() {
|
||||
AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED(), 1, 4);
|
||||
AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(), 1,
|
||||
3);
|
||||
AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_ABS, Register_ABS());
|
||||
AddBuiltin(BuiltinOperator_SIN, Register_SIN());
|
||||
AddBuiltin(BuiltinOperator_COS, Register_COS());
|
||||
AddBuiltin(BuiltinOperator_LOG, Register_LOG());
|
||||
AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
|
||||
AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT());
|
||||
AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE());
|
||||
AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
|
||||
AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR());
|
||||
AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
|
||||
AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
|
||||
AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX());
|
||||
AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT());
|
||||
AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
|
||||
AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LESS, Register_LESS(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_CEIL, Register_CEIL());
|
||||
AddBuiltin(BuiltinOperator_ROUND, Register_ROUND());
|
||||
AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE());
|
||||
AddBuiltin(BuiltinOperator_PACK, Register_PACK(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_PAD, Register_PAD(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_PADV2, Register_PADV2(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_NEG, Register_NEG());
|
||||
AddBuiltin(BuiltinOperator_ADD, Register_ADD(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_MUL, Register_MUL(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_SUB, Register_SUB(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE());
|
||||
AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_RELU, Register_RELU());
|
||||
AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
|
||||
AddBuiltin(BuiltinOperator_MEAN, Register_MEAN());
|
||||
AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
|
||||
Register_RESIZE_NEAREST_NEIGHBOR(),
|
||||
/* min_version = */ 1,
|
||||
/* max_version = */ 2);
|
||||
AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,34 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
class AllOpsResolver : public MicroMutableOpResolver {
|
||||
public:
|
||||
AllOpsResolver();
|
||||
|
||||
private:
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
127
code/lib/tfmicro/tensorflow/lite/micro/kernels/arg_min_max.cc
Normal file
127
code/lib/tfmicro/tensorflow/lite/micro/kernels/arg_min_max.cc
Normal file
@@ -0,0 +1,127 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace arg_min_max {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kAxis = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
|
||||
const T1* input1_data, const T3* input2_data,
|
||||
const RuntimeShape& output_shape, T2* output_data,
|
||||
bool is_arg_max) {
|
||||
if (is_arg_max) {
|
||||
reference_ops::ArgMinMax(input1_shape, input1_data, input2_data,
|
||||
output_shape, output_data, micro::Greater());
|
||||
} else {
|
||||
reference_ops::ArgMinMax(input1_shape, input1_data, input2_data,
|
||||
output_shape, output_data, micro::Less());
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* axis = GetInput(context, node, kAxis);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
|
||||
ArgMinMaxHelper(GetTensorShape(input), GetTensorData<data_type>(input), \
|
||||
GetTensorData<axis_type>(axis), GetTensorShape(output), \
|
||||
GetTensorData<output_type>(output), is_arg_max)
|
||||
if (axis->type == kTfLiteInt32) {
|
||||
if (output->type == kTfLiteInt32) {
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_ARG_MIN_MAX(float, int32_t, int32_t);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
TF_LITE_ARG_MIN_MAX(int8_t, int32_t, int32_t);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only float32, uint8 and int8 are "
|
||||
"supported currently, got %s.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
|
||||
TfLiteTypeGetName(axis->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
#undef TF_LITE_ARG_MIN_MAX
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus ArgMinEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return Eval(context, node, false);
|
||||
}
|
||||
|
||||
TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return Eval(context, node, true);
|
||||
}
|
||||
|
||||
} // namespace arg_min_max
|
||||
|
||||
TfLiteRegistration* Register_ARG_MAX() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_ARG_MIN() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
70
code/lib/tfmicro/tensorflow/lite/micro/kernels/ceil.cc
Normal file
70
code/lib/tfmicro/tensorflow/lite/micro/kernels/ceil.cc
Normal file
@@ -0,0 +1,70 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/ceil.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace ceil {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Ceil(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace ceil
|
||||
|
||||
TfLiteRegistration* Register_CEIL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/ceil::Prepare,
|
||||
/*invoke=*/ceil::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,175 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
/*
|
||||
* The circular buffer custom operator is used to implement strided streaming
|
||||
* convolutions on TFLite Micro. Each time this operator is invoked, it checks
|
||||
* whether or not to run, based on a predetermined stride in time. If the op
|
||||
* runs, it inserts the input into the end of the output buffer and shifts the
|
||||
* output values towards the start of the buffer. It discards the oldest value
|
||||
* in the output buffer.
|
||||
*
|
||||
* Input: [<input N+1]
|
||||
* Before shifting:
|
||||
* Output: [<input 1>, <input 2>, <input ...>, <input N>]
|
||||
*
|
||||
* After shifting:
|
||||
* Output: [<input 2>, <input 3>, <input ...>, <input N+1>]
|
||||
*
|
||||
* We make some assumptions in this custom operator:
|
||||
* - Input shape must be [1, 1, 1, depth]
|
||||
* - Output shape must be [1, num_slots, 1, depth]
|
||||
* - Input and output types must match.
|
||||
* - Input and output quantization params must be identical.
|
||||
*/
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace circular_buffer {
|
||||
|
||||
namespace {
|
||||
|
||||
// The CircularBuffer op has one input and one output tensor.
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// TODO(b/149795762): Add this to TfLiteStatus enum.
|
||||
constexpr int kTfLiteAbort = -9;
|
||||
|
||||
// These fields control the stride period of a strided streaming model. This op
|
||||
// returns kTfLiteAbort until cycles_until_run-- is zero. At this time,
|
||||
// cycles_until_run is reset to cycles_max.
|
||||
struct OpData {
|
||||
int cycles_until_run;
|
||||
int cycles_max;
|
||||
};
|
||||
|
||||
// These constants represent constants specific to the music detect model.
|
||||
// They exist until (b/132070898) is fixed.
|
||||
constexpr int kMaxOpDataSize = 7;
|
||||
int op_data_counter = 0;
|
||||
OpData op_data_array[kMaxOpDataSize];
|
||||
|
||||
} // namespace
|
||||
|
||||
void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, 1, output->dims->data[0]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[0]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, output->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
|
||||
// The circular buffer custom operator currently only supports int8.
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
|
||||
|
||||
// TODO(b/132070898): Use statically slotted OpData structures until a
|
||||
// scratch memory API is ready.
|
||||
TFLITE_DCHECK_LE(op_data_counter, kMaxOpDataSize);
|
||||
OpData* op_data = &op_data_array[op_data_counter++];
|
||||
// The last circular buffer layer (length 5) simply accumulates outputs, and
|
||||
// does not run periodically.
|
||||
// TODO(b/150001379): Move this special case logic to the tflite flatbuffer.
|
||||
if (output->dims->data[1] == 5) {
|
||||
op_data->cycles_max = 1;
|
||||
} else {
|
||||
op_data->cycles_max = 2;
|
||||
}
|
||||
op_data->cycles_until_run = op_data->cycles_max;
|
||||
node->user_data = op_data;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Shifts buffer over by the output depth, and write new input to end of buffer.
|
||||
// num_slots is the number of samples stored in the output buffer.
|
||||
// depth is the size of each sample.
|
||||
void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
|
||||
memmove(output, &output[depth], (num_slots - 1) * depth);
|
||||
memcpy(&output[(num_slots - 1) * depth], input, depth);
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
int num_slots = output->dims->data[1];
|
||||
int depth = output->dims->data[3];
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
EvalInt8(GetTensorData<int8_t>(input), num_slots, depth,
|
||||
GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (--data->cycles_until_run != 0) {
|
||||
// Signal the interpreter to end current run if the delay before op invoke
|
||||
// has not been reached.
|
||||
// TODO(b/149795762): Add kTfLiteAbort to TfLiteStatus enum.
|
||||
return static_cast<TfLiteStatus>(kTfLiteAbort);
|
||||
}
|
||||
|
||||
// If prepare is ever called more than one time (for example, when testing the
|
||||
// ambient model, the interpreter is created a few times), this op data
|
||||
// counter needs to be reset so that future instances do not overrun this op
|
||||
// data array.
|
||||
op_data_counter = 0;
|
||||
|
||||
data->cycles_until_run = data->cycles_max;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace circular_buffer
|
||||
|
||||
TfLiteRegistration* Register_CIRCULAR_BUFFER() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/circular_buffer::Free,
|
||||
/*prepare=*/circular_buffer::Prepare,
|
||||
/*invoke=*/circular_buffer::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
370
code/lib/tfmicro/tensorflow/lite/micro/kernels/comparisons.cc
Normal file
370
code/lib/tfmicro/tensorflow/lite/micro/kernels/comparisons.cc
Normal file
@@ -0,0 +1,370 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace comparisons {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// TODO(ruic): optimize macros below to using template functions.
|
||||
#define TF_LITE_QUANTIZE_COMPARISON(opname) \
|
||||
template <typename input_dtype> \
|
||||
void EvalQuantized##opname(TfLiteContext* context, TfLiteNode* node, \
|
||||
const TfLiteTensor* input1, \
|
||||
const TfLiteTensor* input2, TfLiteTensor* output, \
|
||||
bool requires_broadcast) { \
|
||||
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { \
|
||||
auto input1_offset = -input1->params.zero_point; \
|
||||
auto input2_offset = -input2->params.zero_point; \
|
||||
const int left_shift = 8; \
|
||||
\
|
||||
int32 input1_multiplier; \
|
||||
int input1_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier, \
|
||||
&input1_shift); \
|
||||
int32 input2_multiplier; \
|
||||
int input2_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier, \
|
||||
&input2_shift); \
|
||||
\
|
||||
ComparisonParams op_params; \
|
||||
op_params.left_shift = left_shift; \
|
||||
op_params.input1_offset = input1_offset; \
|
||||
op_params.input1_multiplier = input1_multiplier; \
|
||||
op_params.input1_shift = input1_shift; \
|
||||
op_params.input2_offset = input2_offset; \
|
||||
op_params.input2_multiplier = input2_multiplier; \
|
||||
op_params.input2_shift = input2_shift; \
|
||||
if (requires_broadcast) { \
|
||||
reference_ops::Broadcast4DSlow##opname##WithScaling( \
|
||||
op_params, GetTensorShape(input1), \
|
||||
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<bool>(output)); \
|
||||
} else { \
|
||||
reference_ops::opname##WithScaling( \
|
||||
op_params, GetTensorShape(input1), \
|
||||
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<bool>(output)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
TF_LITE_QUANTIZE_COMPARISON(Equal);
|
||||
TF_LITE_QUANTIZE_COMPARISON(NotEqual);
|
||||
TF_LITE_QUANTIZE_COMPARISON(Greater);
|
||||
TF_LITE_QUANTIZE_COMPARISON(GreaterEqual);
|
||||
TF_LITE_QUANTIZE_COMPARISON(Less);
|
||||
TF_LITE_QUANTIZE_COMPARISON(LessEqual);
|
||||
#undef TF_LITE_QUANTIZE_COMPARISON
|
||||
|
||||
#define TF_LITE_COMPARISON(type, opname, requires_broadcast) \
|
||||
{ \
|
||||
ComparisonParams op_params; \
|
||||
requires_broadcast \
|
||||
? reference_ops::Broadcast4DSlow##opname##NoScaling( \
|
||||
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<type>(input2), \
|
||||
GetTensorShape(output), GetTensorData<bool>(output)) \
|
||||
: reference_ops::opname##NoScaling( \
|
||||
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<type>(input2), \
|
||||
GetTensorShape(output), GetTensorData<bool>(output)); \
|
||||
}
|
||||
|
||||
TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
TF_LITE_COMPARISON(bool, Equal, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Equal, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Equal, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Equal, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// TODO(renjieliu): Refactor the logic to avoid duplications.
|
||||
TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
TF_LITE_COMPARISON(bool, NotEqual, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, NotEqual, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, NotEqual, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, NotEqual, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedNotEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedNotEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Greater, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Greater, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Greater, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedGreater<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedGreater<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, GreaterEqual, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, GreaterEqual, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, GreaterEqual, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedGreaterEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedGreaterEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Less, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Less, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Less, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedLess<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedLess<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, LessEqual, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, LessEqual, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, LessEqual, requires_broadcast);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedLessEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedLessEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace comparisons
|
||||
|
||||
TfLiteRegistration* Register_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::EqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_NOT_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::NotEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_GREATER() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::GreaterEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_GREATER_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::GreaterEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LESS() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::LessEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LESS_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::LessEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
231
code/lib/tfmicro/tensorflow/lite/micro/kernels/concatenation.cc
Normal file
231
code/lib/tfmicro/tensorflow/lite/micro/kernels/concatenation.cc
Normal file
@@ -0,0 +1,231 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace concatenation {
|
||||
|
||||
constexpr int kMaxInputNum = 10; // Maximum number of input tensors
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// This function only checks the types. Additional shape validations are
|
||||
// performed in the reference implementation called during Eval().
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
TfLiteType input_type = GetInput(context, node, 0)->type;
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
|
||||
// Check activation and input type
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
TF_LITE_ENSURE(context,
|
||||
input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
|
||||
input_type == kTfLiteInt8 || input_type == kTfLiteInt32 ||
|
||||
input_type == kTfLiteInt64);
|
||||
|
||||
// Output type must match input type
|
||||
TF_LITE_ENSURE_EQ(context, output_type, input_type);
|
||||
|
||||
// This implementation does not support large number of input tensors
|
||||
const int num_inputs = NumInputs(node);
|
||||
TF_LITE_ENSURE(context, num_inputs <= kMaxInputNum);
|
||||
|
||||
// Shapes with dimensions >4 are not yet supported with static allocation.
|
||||
for (int i = 0; i < num_inputs; ++i) {
|
||||
const TfLiteTensor* input = GetInput(context, node, i);
|
||||
int num_dimensions = NumDimensions(input);
|
||||
|
||||
if (num_dimensions > 4) {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Op Concatenation does not currently support num dimensions >4 "
|
||||
"Tensor '%s' has %d dimensions.",
|
||||
input->name, num_dimensions);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Handles negative axis index, coerces to positive index value.
|
||||
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
|
||||
if (axis >= 0) {
|
||||
return axis;
|
||||
} else {
|
||||
return NumDimensions(output_tensor) + axis;
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are helpers to get tensor data in the format that the
|
||||
// reference op implementation expects. They provide the same functionality as
|
||||
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
|
||||
|
||||
// Gets shapes from a list of tensors.
|
||||
inline void GetAllTensorShapes(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
RuntimeShape all_shapes[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
RuntimeShape shape = GetTensorShape(t);
|
||||
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
|
||||
}
|
||||
}
|
||||
|
||||
// Get shape pointers from a list of shapes.
|
||||
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
|
||||
const RuntimeShape* pointers[]) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
pointers[i] = &shapes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Gets data pointers from a list of tensors.
|
||||
template <typename T>
|
||||
inline void GetAllTensorData(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
T* all_data[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
all_data[i] = GetTensorData<T>(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Gets scale and zero point from a list of tensors
|
||||
inline void GetAllQuantizationParam(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
float scales[kMaxInputNum],
|
||||
int32 zero_points[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
scales[i] = t->params.scale;
|
||||
zero_points[i] = t->params.zero_point;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const data_type* inputs_data[kMaxInputNum];
|
||||
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllTensorData(*context, *node->inputs, inputs_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
ConcatenationParams op_params;
|
||||
op_params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
op_params.inputs_count = NumInputs(node);
|
||||
|
||||
reference_ops::Concatenation(op_params, inputs_shape_ptr, inputs_data,
|
||||
GetTensorShape(output),
|
||||
GetTensorData<data_type>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const uint8_t* inputs_data[kMaxInputNum];
|
||||
float inputs_scale[kMaxInputNum];
|
||||
int32 inputs_zero_point[kMaxInputNum];
|
||||
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllTensorData(*context, *node->inputs, inputs_data);
|
||||
GetAllQuantizationParam(*context, *node->inputs, inputs_scale,
|
||||
inputs_zero_point);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
ConcatenationParams op_params;
|
||||
op_params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
op_params.inputs_count = NumInputs(node);
|
||||
op_params.input_zeropoint = inputs_zero_point;
|
||||
op_params.input_scale = inputs_scale;
|
||||
op_params.output_zeropoint = output->params.zero_point;
|
||||
op_params.output_scale = output->params.scale;
|
||||
|
||||
reference_ops::ConcatenationWithScaling(op_params, inputs_shape_ptr,
|
||||
inputs_data, GetTensorShape(output),
|
||||
GetTensorData<uint8>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
|
||||
switch (output_type) { // Already know in/outtypes are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalUnquantized<float>(context, node);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
EvalUnquantized<int32_t>(context, node);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedUInt8(context, node);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalUnquantized<int8_t>(context, node);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
EvalUnquantized<int64_t>(context, node);
|
||||
break;
|
||||
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Op Concatenation does not currently support Type '%s'.",
|
||||
TfLiteTypeGetName(output_type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace concatenation
|
||||
|
||||
TfLiteRegistration* Register_CONCATENATION() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/concatenation::Prepare,
|
||||
/*invoke=*/concatenation::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
279
code/lib/tfmicro/tensorflow/lite/micro/kernels/conv copy.cc
Normal file
279
code/lib/tfmicro/tensorflow/lite/micro/kernels/conv copy.cc
Normal file
@@ -0,0 +1,279 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace conv {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
// Angepasst jomjol 05.06.20
|
||||
//constexpr int kMaxChannels = 1024;
|
||||
constexpr int kMaxChannels = 4096;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
// This file has 2 implementation of Conv.
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
return PaddingType::kSame;
|
||||
case TfLitePadding::kTfLitePaddingValid:
|
||||
return PaddingType::kValid;
|
||||
case TfLitePadding::kTfLitePaddingUnknown:
|
||||
default:
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params->padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
params->dilation_height_factor, params->dilation_width_factor, height,
|
||||
width, filter_height, filter_width, padding, &out_height, &out_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift),
|
||||
output_channels));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = -data->output_shift;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
||||
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32_t>(bias), GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
|
||||
GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
TfLiteTensor* im2col) {
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(filter),
|
||||
GetTensorData<float>(filter), GetTensorShape(bias),
|
||||
GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output), GetTensorShape(im2col),
|
||||
GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
int filter_width = filter->dims->data[2];
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
OpData data;
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, &data));
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
||||
output, nullptr);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, &data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace conv
|
||||
|
||||
TfLiteRegistration* Register_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
282
code/lib/tfmicro/tensorflow/lite/micro/kernels/conv.cc
Normal file
282
code/lib/tfmicro/tensorflow/lite/micro/kernels/conv.cc
Normal file
@@ -0,0 +1,282 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace conv {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
// Angepasst jomjol 05.06.20
|
||||
//constexpr int kMaxChannels = 1024;
|
||||
constexpr int kMaxChannels = 32384;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
// This file has 2 implementation of Conv.
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
return PaddingType::kSame;
|
||||
case TfLitePadding::kTfLitePaddingValid:
|
||||
return PaddingType::kValid;
|
||||
case TfLitePadding::kTfLitePaddingUnknown:
|
||||
default:
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params->padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
params->dilation_height_factor, params->dilation_width_factor, height,
|
||||
width, filter_height, filter_width, padding, &out_height, &out_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift),
|
||||
output_channels));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = -data->output_shift;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
||||
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32_t>(bias), GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
|
||||
GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
TfLiteTensor* im2col) {
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(filter),
|
||||
GetTensorData<float>(filter), GetTensorShape(bias),
|
||||
GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output), GetTensorShape(im2col),
|
||||
GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
int filter_width = filter->dims->data[2];
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
|
||||
struct tflite::ops::micro::conv::OpData *data = (struct tflite::ops::micro::conv::OpData*) malloc(sizeof(struct tflite::ops::micro::conv::OpData));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
|
||||
output, nullptr);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
free(data);
|
||||
return kTfLiteError;
|
||||
}
|
||||
free(data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace conv
|
||||
|
||||
TfLiteRegistration* Register_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
269
code/lib/tfmicro/tensorflow/lite/micro/kernels/depthwise_conv.cc
Normal file
269
code/lib/tfmicro/tensorflow/lite/micro/kernels/depthwise_conv.cc
Normal file
@@ -0,0 +1,269 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace depthwise_conv {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
constexpr int kMaxChannels = 1024;
|
||||
|
||||
// Depthwise conv is quantized along dimension 3:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kDepthwiseConvQuantizedDimension = 3;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, int width,
|
||||
int height, int filter_width, int filter_height,
|
||||
const TfLiteType data_type, OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
int unused_output_height, unused_output_width;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width, 1, 1, height, width,
|
||||
filter_height, filter_width, params->padding, &unused_output_height,
|
||||
&unused_output_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data->output_shift;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(filter), GetTensorData<uint8_t>(filter),
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
(NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
|
||||
|
||||
const TfLiteType data_type = input->type;
|
||||
int width = SizeOfDimension(input, 2);
|
||||
int height = SizeOfDimension(input, 1);
|
||||
int filter_width = SizeOfDimension(filter, 2);
|
||||
int filter_height = SizeOfDimension(filter, 1);
|
||||
|
||||
OpData data;
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
TF_LITE_ENSURE(
|
||||
context, affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kDepthwiseConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
|
||||
filter_width, filter_height, data_type,
|
||||
&data));
|
||||
|
||||
// TODO(aselle): Consider whether float conv and quantized conv should be
|
||||
// separate ops to avoid dispatch overhead here.
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input, filter, bias, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
||||
output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, &data, input, filter, bias, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace depthwise_conv
|
||||
|
||||
TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/depthwise_conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
135
code/lib/tfmicro/tensorflow/lite/micro/kernels/dequantize.cc
Normal file
135
code/lib/tfmicro/tensorflow/lite/micro/kernels/dequantize.cc
Normal file
@@ -0,0 +1,135 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace dequantize {
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteInt16);
|
||||
TF_LITE_ENSURE(
|
||||
context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
tflite::DequantizationParams op_params;
|
||||
op_params.zero_point = input->params.zero_point;
|
||||
op_params.scale = static_cast<double>(input->params.scale);
|
||||
switch (input->type) {
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<int16_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (output->type == kTfLiteInt32) {
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
const double effective_output_scale =
|
||||
static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(effective_output_scale, &output_multiplier,
|
||||
&output_shift);
|
||||
int flat_size =
|
||||
MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
|
||||
switch (input->type) {
|
||||
case kTfLiteInt16: {
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int16_t>(input), flat_size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int8_t>(input), flat_size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace dequantize
|
||||
|
||||
TfLiteRegistration* Register_DEQUANTIZE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/dequantize::Prepare,
|
||||
/*invoke=*/dequantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
226
code/lib/tfmicro/tensorflow/lite/micro/kernels/elementwise.cc
Normal file
226
code/lib/tfmicro/tensorflow/lite/micro/kernels/elementwise.cc
Normal file
@@ -0,0 +1,226 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace elementwise {
|
||||
namespace {
|
||||
|
||||
bool IsNumericSupportedType(const TfLiteType type) {
|
||||
return type == kTfLiteFloat32;
|
||||
}
|
||||
|
||||
bool IsLogicalSupportedType(const TfLiteType type) {
|
||||
return type == kTfLiteBool;
|
||||
}
|
||||
|
||||
typedef bool (*IsSupportedType)(TfLiteType);
|
||||
template <IsSupportedType>
|
||||
TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
if (!IsSupportedType(input->type)) {
|
||||
TF_LITE_KERNEL_LOG(context, "Input data type %s (%d) is not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
T func(T), TfLiteType expected_type) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, expected_type);
|
||||
const int64_t num_elements = NumElements(input);
|
||||
const T* in_data = GetTensorData<T>(input);
|
||||
T* out_data = GetTensorData<T>(output);
|
||||
for (int64_t i = 0; i < num_elements; ++i) {
|
||||
out_data[i] = func(in_data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
inline TfLiteStatus EvalNumeric(TfLiteContext* context, TfLiteNode* node,
|
||||
float float_func(float)) {
|
||||
return EvalImpl<float>(context, node, float_func, kTfLiteFloat32);
|
||||
}
|
||||
|
||||
inline TfLiteStatus EvalLogical(TfLiteContext* context, TfLiteNode* node,
|
||||
bool bool_func(bool)) {
|
||||
return EvalImpl<bool>(context, node, bool_func, kTfLiteBool);
|
||||
}
|
||||
|
||||
TfLiteStatus AbsEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::abs);
|
||||
}
|
||||
|
||||
TfLiteStatus SinEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::sin);
|
||||
}
|
||||
|
||||
TfLiteStatus CosEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::cos);
|
||||
}
|
||||
|
||||
TfLiteStatus LogEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::log);
|
||||
}
|
||||
|
||||
TfLiteStatus SqrtEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, std::sqrt);
|
||||
}
|
||||
|
||||
TfLiteStatus RsqrtEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, [](float f) { return 1.f / std::sqrt(f); });
|
||||
}
|
||||
|
||||
TfLiteStatus SquareEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalNumeric(context, node, [](float f) { return f * f; });
|
||||
}
|
||||
|
||||
TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalLogical(context, node, [](bool v) { return !v; });
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace elementwise
|
||||
|
||||
TfLiteRegistration* Register_ABS() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::AbsEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SIN() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_COS() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::CosEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOG() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::LogEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SQRT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RSQRT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::RsqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SQUARE() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SquareEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_NOT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
|
||||
/*invoke=*/elementwise::LogicalNotEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
54
code/lib/tfmicro/tensorflow/lite/micro/kernels/floor.cc
Normal file
54
code/lib/tfmicro/tensorflow/lite/micro/kernels/floor.cc
Normal file
@@ -0,0 +1,54 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/floor.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace floor {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace floor
|
||||
|
||||
TfLiteRegistration* Register_FLOOR() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/floor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,233 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace fully_connected {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
// The index of the temporary tensor where the quantized inputs are cached.
|
||||
int input_quantized_index;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kWeightsTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
TfLiteFusedActivation activation,
|
||||
TfLiteType data_type, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
OpData* data) {
|
||||
TfLiteStatus status = kTfLiteOk;
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
double real_multiplier = 0.0;
|
||||
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
|
||||
context, input, filter, bias, output, &real_multiplier));
|
||||
int exponent;
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
|
||||
data->output_shift = -exponent;
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const auto params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
return CalculateOpData(context, params->activation, input->type, input,
|
||||
filter, bias, output, data);
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.weights_offset = -filter->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// TODO(b/138810107): Figure out whether output shift should be inverted
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::FullyConnected(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(filter), GetTensorData<int8_t>(filter),
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
||||
TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
|
||||
GetTensorShape(filter), GetTensorData<uint8_t>(filter), \
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias), \
|
||||
GetTensorShape(output), GetTensorData<output_data_type>(output))
|
||||
switch (output->type) {
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_FULLY_CONNECTED(uint8_t);
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
TF_LITE_FULLY_CONNECTED(int16_t);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteFusedActivation activation,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
tflite::reference_ops::FullyConnected(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
const auto* params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
// Checks in Prepare ensure input, output and filter types are all the same.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
return EvalFloat(context, node, params->activation, input, filter, bias,
|
||||
output);
|
||||
case kTfLiteInt8:
|
||||
return EvalQuantizedInt8(context, node, data, input, filter, bias,
|
||||
output);
|
||||
|
||||
case kTfLiteUInt8:
|
||||
return EvalQuantized(context, node, data, input, filter, bias, output);
|
||||
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace fully_connected
|
||||
|
||||
TfLiteRegistration* Register_FULLY_CONNECTED() {
|
||||
static TfLiteRegistration r = {/*init=*/fully_connected::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/fully_connected::Prepare,
|
||||
/*invoke=*/fully_connected::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
150
code/lib/tfmicro/tensorflow/lite/micro/kernels/l2norm.cc
Normal file
150
code/lib/tfmicro/tensorflow/lite/micro/kernels/l2norm.cc
Normal file
@@ -0,0 +1,150 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace l2norm {
|
||||
|
||||
// This file has two implementation of L2Norm.
|
||||
enum KernelType {
|
||||
kReference,
|
||||
kGenericOptimized,
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(DEBUG)
|
||||
auto* params = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
|
||||
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
|
||||
output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.scale, (1. / 128.));
|
||||
if (output->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 128);
|
||||
}
|
||||
if (output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(ahentz): For some reason our implementations don't support
|
||||
// activations.
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
#endif
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/143912164): instead of hardcode the epsilon here, we should read it
|
||||
// from tensorflow, i.e., adding a params.
|
||||
// We don't compute epsilon for quantized kernel:
|
||||
//
|
||||
// epsilon_float = (epsilon_quant - zp) * scale
|
||||
// so
|
||||
// espsilon_quant = epsilon_float / scale + zp
|
||||
// We know epsilon_float is just a very small number to avoid division by
|
||||
// zero error, and scale is > 1, so the integer value of epsilon for quant
|
||||
// is just dominated by the zero point.
|
||||
// Also, GetInvSqrtQuantizedMultiplierExp handles the scenario where the sum
|
||||
// of input value squared is zero case well.
|
||||
// So we don't even need to do handle the epsilon for quantized kernel case.
|
||||
const float epsilon = 1e-6f;
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
#define TF_LITE_L2NORM(type) \
|
||||
tflite::L2NormalizationParams op_params; \
|
||||
op_params.input_zero_point = 0; \
|
||||
type::L2Normalization(op_params, GetTensorShape(input), \
|
||||
GetTensorData<float>(input), GetTensorShape(output), \
|
||||
GetTensorData<float>(output), epsilon)
|
||||
|
||||
TF_LITE_L2NORM(reference_ops);
|
||||
#undef TF_LITE_L2NORM
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
#define TF_LITE_L2NORM(type) \
|
||||
tflite::L2NormalizationParams op_params; \
|
||||
op_params.input_zero_point = input->params.zero_point; \
|
||||
type::L2Normalization(op_params, GetTensorShape(input), \
|
||||
GetTensorData<uint8>(input), GetTensorShape(output), \
|
||||
GetTensorData<uint8>(output))
|
||||
|
||||
TF_LITE_L2NORM(reference_ops);
|
||||
#undef TF_LITE_L2NORM
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
const auto input_shape = GetTensorShape(input);
|
||||
const auto output_shape = GetTensorShape(output);
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
reference_integer_ops::L2Normalization(input->params.zero_point, outer_size,
|
||||
depth, GetTensorData<int8>(input),
|
||||
GetTensorData<int8>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float.",
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace l2norm
|
||||
|
||||
TfLiteRegistration* Register_L2NORM_REF() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/l2norm::Prepare,
|
||||
/*invoke=*/l2norm::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_L2_NORMALIZATION() {
|
||||
return Register_L2NORM_REF();
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
98
code/lib/tfmicro/tensorflow/lite/micro/kernels/logical.cc
Normal file
98
code/lib/tfmicro/tensorflow/lite/micro/kernels/logical.cc
Normal file
@@ -0,0 +1,98 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace logical {
|
||||
namespace {
|
||||
|
||||
// Input/output tensor index.
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
bool (*func)(bool, bool)) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
if (HaveSameShapes(input1, input2)) {
|
||||
reference_ops::BinaryFunction<bool, bool, bool>(
|
||||
GetTensorShape(input1), GetTensorData<bool>(input1),
|
||||
GetTensorShape(input2), GetTensorData<bool>(input2),
|
||||
GetTensorShape(output), GetTensorData<bool>(output), func);
|
||||
} else {
|
||||
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
|
||||
GetTensorShape(input1), GetTensorData<bool>(input1),
|
||||
GetTensorShape(input2), GetTensorData<bool>(input2),
|
||||
GetTensorShape(output), GetTensorData<bool>(output), func);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
bool LogicalOr(bool x, bool y) { return x || y; }
|
||||
|
||||
TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return LogicalImpl(context, node, LogicalOr);
|
||||
}
|
||||
|
||||
bool LogicalAnd(bool x, bool y) { return x && y; }
|
||||
|
||||
TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return LogicalImpl(context, node, LogicalAnd);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace logical
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_OR() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalOrEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_AND() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalAndEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
129
code/lib/tfmicro/tensorflow/lite/micro/kernels/logistic.cc
Normal file
129
code/lib/tfmicro/tensorflow/lite/micro/kernels/logistic.cc
Normal file
@@ -0,0 +1,129 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t input_zero_point;
|
||||
int32_t input_range_radius;
|
||||
int32_t input_multiplier;
|
||||
int input_left_shift;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
OpData* data) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
std::numeric_limits<int8_t>::min());
|
||||
|
||||
static constexpr int kInputIntegerBits = 4;
|
||||
const double input_real_multiplier =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||
|
||||
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||
|
||||
data->input_range_radius =
|
||||
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
OpData data;
|
||||
CalculateArithmeticOpData(context, node, &data);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Logistic(
|
||||
GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::Logistic(
|
||||
input->params.zero_point, data.input_range_radius,
|
||||
data.input_multiplier, data.input_left_shift,
|
||||
NumElements(input->dims), GetTensorData<int8_t>(input),
|
||||
GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
// TODO(b/141211002): Also support other data types once we have supported
|
||||
// temporary tensors in TFLM.
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_LOGISTIC() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/activations::LogisticEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,151 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace maximum_minimum {
|
||||
namespace {
|
||||
|
||||
// This file has a reference implementation of TFMaximum/TFMinimum.
|
||||
enum KernelType {
|
||||
kReference,
|
||||
};
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpContext {
|
||||
OpContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input1 = GetInput(context, node, kInputTensor1);
|
||||
input2 = GetInput(context, node, kInputTensor2);
|
||||
output = GetOutput(context, node, kOutputTensor);
|
||||
}
|
||||
const TfLiteTensor* input1;
|
||||
const TfLiteTensor* input2;
|
||||
TfLiteTensor* output;
|
||||
};
|
||||
|
||||
struct MaximumOp {
|
||||
template <typename data_type>
|
||||
static data_type op(data_type el1, data_type el2) {
|
||||
return el1 > el2 ? el1 : el2;
|
||||
}
|
||||
};
|
||||
|
||||
struct MinimumOp {
|
||||
template <typename data_type>
|
||||
static data_type op(data_type el1, data_type el2) {
|
||||
return el1 < el2 ? el1 : el2;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename data_type, typename op_type>
|
||||
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpContext& op_context) {
|
||||
reference_ops::MaximumMinimumBroadcastSlow(
|
||||
GetTensorShape(op_context.input1),
|
||||
GetTensorData<data_type>(op_context.input1),
|
||||
GetTensorShape(op_context.input2),
|
||||
GetTensorData<data_type>(op_context.input2),
|
||||
GetTensorShape(op_context.output),
|
||||
GetTensorData<data_type>(op_context.output),
|
||||
op_type::template op<data_type>);
|
||||
}
|
||||
|
||||
template <KernelType kernel_type, typename OpType>
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
OpContext op_context(context, node);
|
||||
|
||||
if (kernel_type == kReference) {
|
||||
switch (op_context.output->type) {
|
||||
case kTfLiteFloat32:
|
||||
TFLiteOperation<float, OpType>(context, node, op_context);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
TFLiteOperation<uint8_t, OpType>(context, node, op_context);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
TFLiteOperation<int8_t, OpType>(context, node, op_context);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TFLiteOperation<int32_t, OpType>(context, node, op_context);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TFLiteOperation<int64_t, OpType>(context, node, op_context);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Type %s (%d) is not supported by Maximum/Minimum.",
|
||||
TfLiteTypeGetName(op_context.output->type),
|
||||
op_context.output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Kernel type not supported by Maximum/Minimum.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace maximum_minimum
|
||||
|
||||
TfLiteRegistration* Register_MAXIMUM() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MaximumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_MINIMUM() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MinimumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
89
code/lib/tfmicro/tensorflow/lite/micro/kernels/micro_ops.h
Normal file
89
code/lib/tfmicro/tensorflow/lite/micro/kernels/micro_ops.h
Normal file
@@ -0,0 +1,89 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Forward declaration of all micro op kernel registration methods. These
|
||||
// registrations are included with the standard `BuiltinOpResolver`.
|
||||
//
|
||||
// This header is particularly useful in cases where only a subset of ops are
|
||||
// needed. In such cases, the client can selectively add only the registrations
|
||||
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
|
||||
// registration in turn allows the linker to strip unused kernels.
|
||||
|
||||
TfLiteRegistration* Register_ABS();
|
||||
TfLiteRegistration* Register_ADD();
|
||||
TfLiteRegistration* Register_ARG_MAX();
|
||||
TfLiteRegistration* Register_ARG_MIN();
|
||||
TfLiteRegistration* Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration* Register_CEIL();
|
||||
TfLiteRegistration* Register_CIRCULAR_BUFFER();
|
||||
TfLiteRegistration* Register_CONV_2D();
|
||||
TfLiteRegistration* Register_CONCATENATION();
|
||||
TfLiteRegistration* Register_COS();
|
||||
TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration* Register_DEQUANTIZE();
|
||||
TfLiteRegistration* Register_EQUAL();
|
||||
TfLiteRegistration* Register_FLOOR();
|
||||
TfLiteRegistration* Register_FULLY_CONNECTED();
|
||||
TfLiteRegistration* Register_GREATER();
|
||||
TfLiteRegistration* Register_GREATER_EQUAL();
|
||||
TfLiteRegistration* Register_LESS();
|
||||
TfLiteRegistration* Register_LESS_EQUAL();
|
||||
TfLiteRegistration* Register_LOG();
|
||||
TfLiteRegistration* Register_LOGICAL_AND();
|
||||
TfLiteRegistration* Register_LOGICAL_NOT();
|
||||
TfLiteRegistration* Register_LOGICAL_OR();
|
||||
TfLiteRegistration* Register_LOGISTIC();
|
||||
TfLiteRegistration* Register_MAXIMUM();
|
||||
TfLiteRegistration* Register_MAX_POOL_2D();
|
||||
TfLiteRegistration* Register_MEAN();
|
||||
TfLiteRegistration* Register_MINIMUM();
|
||||
TfLiteRegistration* Register_MUL();
|
||||
TfLiteRegistration* Register_NEG();
|
||||
TfLiteRegistration* Register_NOT_EQUAL();
|
||||
TfLiteRegistration* Register_PACK();
|
||||
TfLiteRegistration* Register_PAD();
|
||||
TfLiteRegistration* Register_PADV2();
|
||||
TfLiteRegistration* Register_PRELU();
|
||||
TfLiteRegistration* Register_QUANTIZE();
|
||||
TfLiteRegistration* Register_RELU();
|
||||
TfLiteRegistration* Register_RELU6();
|
||||
TfLiteRegistration* Register_RESHAPE();
|
||||
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration* Register_ROUND();
|
||||
TfLiteRegistration* Register_RSQRT();
|
||||
TfLiteRegistration* Register_SIN();
|
||||
TfLiteRegistration* Register_SOFTMAX();
|
||||
TfLiteRegistration* Register_SPLIT();
|
||||
TfLiteRegistration* Register_SQRT();
|
||||
TfLiteRegistration* Register_SQUARE();
|
||||
TfLiteRegistration* Register_STRIDED_SLICE();
|
||||
TfLiteRegistration* Register_SUB();
|
||||
TfLiteRegistration* Register_SVDF();
|
||||
TfLiteRegistration* Register_UNPACK();
|
||||
TfLiteRegistration* Register_L2_NORMALIZATION();
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_OPS_H_
|
||||
37
code/lib/tfmicro/tensorflow/lite/micro/kernels/micro_utils.h
Normal file
37
code/lib/tfmicro/tensorflow/lite/micro/kernels/micro_utils.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Same as gtl::Greater but defined here to reduce dependencies and
|
||||
// binary size for micro environment.
|
||||
struct Greater {
|
||||
template <typename T>
|
||||
bool operator()(const T& x, const T& y) const {
|
||||
return x > y;
|
||||
}
|
||||
};
|
||||
|
||||
struct Less {
|
||||
template <typename T>
|
||||
bool operator()(const T& x, const T& y) const {
|
||||
return x < y;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
||||
175
code/lib/tfmicro/tensorflow/lite/micro/kernels/mul.cc
Normal file
175
code/lib/tfmicro/tensorflow/lite/micro/kernels/mul.cc
Normal file
@@ -0,0 +1,175 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/mul.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace mul {
|
||||
|
||||
constexpr int kInput1Tensor = 0;
|
||||
constexpr int kInput2Tensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
double real_multiplier = static_cast<double>(input1->params.scale) *
|
||||
static_cast<double>(input2->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
op_params.input1_offset = -input1->params.zero_point;
|
||||
op_params.input2_offset = -input2->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
|
||||
#define TF_LITE_MUL(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output));
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_ops, Mul, uint8_t);
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_MUL(opname) \
|
||||
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||
GetTensorData<float>(output));
|
||||
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(BroadcastMul4DSlow);
|
||||
} else {
|
||||
TF_LITE_MUL(Mul);
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
CalculateOpData(context, node, params, &data);
|
||||
|
||||
switch (input1->type) {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
EvalQuantized(context, node, params, &data, input1, input2, output);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input1, input2, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace mul
|
||||
|
||||
TfLiteRegistration* Register_MUL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/mul::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
64
code/lib/tfmicro/tensorflow/lite/micro/kernels/neg.cc
Normal file
64
code/lib/tfmicro/tensorflow/lite/micro/kernels/neg.cc
Normal file
@@ -0,0 +1,64 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/neg.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace neg {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
switch (input->type) {
|
||||
// TODO(wangtz): handle for kTfLiteInt8
|
||||
case kTfLiteFloat32:
|
||||
reference_ops::Negate(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace neg
|
||||
|
||||
TfLiteRegistration* Register_NEG() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/neg::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
125
code/lib/tfmicro/tensorflow/lite/micro/kernels/pack.cc
Normal file
125
code/lib/tfmicro/tensorflow/lite/micro/kernels/pack.cc
Normal file
@@ -0,0 +1,125 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pack {
|
||||
namespace {
|
||||
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteTensor* output, int values_count, int axis) {
|
||||
const int dimensions = output->dims->size;
|
||||
const TfLiteTensor* input0 = GetInput(context, node, 0);
|
||||
const TfLiteIntArray* input_dims = input0->dims;
|
||||
const TfLiteIntArray* output_dims = output->dims;
|
||||
|
||||
if (axis < 0) {
|
||||
axis += dimensions;
|
||||
}
|
||||
|
||||
int outer_size = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
outer_size *= output_dims->data[i];
|
||||
}
|
||||
int copy_size = 1;
|
||||
for (int i = axis + 1; i < dimensions; ++i) {
|
||||
copy_size *= output_dims->data[i];
|
||||
}
|
||||
int input_size = 1;
|
||||
for (int i = 0; i < input_dims->size; ++i) {
|
||||
input_size *= input_dims->data[i];
|
||||
}
|
||||
TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
|
||||
|
||||
T* output_data = GetTensorData<T>(output);
|
||||
|
||||
for (int i = 0; i < values_count; ++i) {
|
||||
const TfLiteTensor* t = GetInput(context, node, i);
|
||||
const T* input_data = GetTensorData<T>(t);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
const T* input_ptr = input_data + copy_size * k;
|
||||
int loc = k * values_count * copy_size + i * copy_size;
|
||||
T* output_ptr = output_data + loc;
|
||||
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLitePackParams* data =
|
||||
reinterpret_cast<TfLitePackParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
return PackImpl<float>(context, node, output, data->values_count,
|
||||
data->axis);
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
return PackImpl<uint8_t>(context, node, output, data->values_count,
|
||||
data->axis);
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
return PackImpl<int8_t>(context, node, output, data->values_count,
|
||||
data->axis);
|
||||
}
|
||||
case kTfLiteInt32: {
|
||||
return PackImpl<int32_t>(context, node, output, data->values_count,
|
||||
data->axis);
|
||||
}
|
||||
case kTfLiteInt64: {
|
||||
return PackImpl<int64_t>(context, node, output, data->values_count,
|
||||
data->axis);
|
||||
}
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by pack.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace pack
|
||||
|
||||
TfLiteRegistration* Register_PACK() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
237
code/lib/tfmicro/tensorflow/lite/micro/kernels/pad.cc
Normal file
237
code/lib/tfmicro/tensorflow/lite/micro/kernels/pad.cc
Normal file
@@ -0,0 +1,237 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/pad.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
#ifdef MEMORY_SANITIZER
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#else
|
||||
#define __msan_check_mem_is_initialized(ptr, size)
|
||||
#endif
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pad {
|
||||
|
||||
struct PadContext {
|
||||
PadContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input = GetInput(context, node, 0);
|
||||
paddings = GetInput(context, node, 1);
|
||||
constant_values = nullptr;
|
||||
if (NumInputs(node) == 3) {
|
||||
constant_values = GetOptionalInputTensor(context, node, 2);
|
||||
} else {
|
||||
constant_values = nullptr;
|
||||
}
|
||||
output = GetOutput(context, node, 0);
|
||||
dims = NumDimensions(input);
|
||||
|
||||
resizing_category = ResizingCategory::kGenericResize;
|
||||
const int paddings_total = GetTensorShape(paddings).FlatSize();
|
||||
const int32* paddings_data = GetTensorData<int32>(paddings);
|
||||
// Paddings will be a n,2 array, and we need to detect 4D arrays with the
|
||||
// pattern { {0,0}, {a, b}, {c, d}, {0,0} }.
|
||||
if (IsConstantTensor(paddings) && paddings_total == 8 &&
|
||||
(paddings_data[0] == 0 && paddings_data[1] == 0) &&
|
||||
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
|
||||
resizing_category = ResizingCategory::kImageStyle;
|
||||
}
|
||||
}
|
||||
const TfLiteTensor* constant_values;
|
||||
const TfLiteTensor* input;
|
||||
const TfLiteTensor* paddings;
|
||||
TfLiteTensor* output;
|
||||
int dims;
|
||||
ResizingCategory resizing_category;
|
||||
};
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
PadContext op_context(context, node);
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
|
||||
if (op_context.constant_values != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->type,
|
||||
op_context.constant_values->type);
|
||||
}
|
||||
|
||||
// There must be a pair of paddings for each output dimension.
|
||||
TF_LITE_ENSURE_EQ(context, GetTensorShape(op_context.paddings).FlatSize(),
|
||||
op_context.output->dims->size * 2);
|
||||
|
||||
// On Micro, outputs must be properly sized by the converter.
|
||||
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
|
||||
for (int i = 0; i < op_context.output->dims->size; i++) {
|
||||
int output_dim = op_context.output->dims->data[i];
|
||||
int expected_dim = op_context.input->dims->data[i] + paddings_data[i * 2] +
|
||||
paddings_data[i * 2 + 1];
|
||||
TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
|
||||
}
|
||||
|
||||
// Current implementations rely on the inputs being <= 4D.
|
||||
TF_LITE_ENSURE(
|
||||
context, op_context.dims <= reference_ops::PadKernelMaxDimensionCount());
|
||||
TF_LITE_ENSURE(context, IsConstantTensor(op_context.paddings));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
PadContext op_context(context, node);
|
||||
|
||||
if (op_context.constant_values != nullptr) {
|
||||
// Ensure that constant_values is a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(op_context.constant_values), 1);
|
||||
}
|
||||
|
||||
// Create before and after padding arrays that are accepted by the kernel.
|
||||
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
|
||||
|
||||
tflite::PadParams op_params;
|
||||
memset(&op_params, 0, sizeof(PadParams));
|
||||
op_params.left_padding_count = op_context.dims;
|
||||
op_params.right_padding_count = op_context.dims;
|
||||
|
||||
for (int idx = op_context.dims - 1; idx >= 0; --idx) {
|
||||
op_params.left_padding[idx] = paddings_data[idx * 2];
|
||||
op_params.right_padding[idx] = paddings_data[idx * 2 + 1];
|
||||
}
|
||||
|
||||
#define TF_LITE_PAD(type, op_name, scalar, pad_value) \
|
||||
const scalar pad_value_copy = pad_value; \
|
||||
\
|
||||
type::op_name(op_params, GetTensorShape(op_context.input), \
|
||||
GetTensorData<scalar>(op_context.input), &pad_value_copy, \
|
||||
GetTensorShape(op_context.output), \
|
||||
GetTensorData<scalar>(op_context.output))
|
||||
switch (op_context.input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
float pad_value = op_context.constant_values == nullptr
|
||||
? 0.f
|
||||
: *GetTensorData<float>(op_context.constant_values);
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, float, pad_value);
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, float, pad_value);
|
||||
}
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
uint8_t pad_value;
|
||||
if (op_context.constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
|
||||
std::numeric_limits<uint8_t>::min());
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
pad_value = static_cast<uint8_t>(op_context.output->params.zero_point);
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context, static_cast<double>(op_context.output->params.scale),
|
||||
static_cast<double>(op_context.constant_values->params.scale));
|
||||
pad_value = *GetTensorData<uint8_t>(op_context.constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, uint8_t, pad_value);
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, uint8_t, pad_value);
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
int8_t pad_value;
|
||||
if (op_context.constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
|
||||
std::numeric_limits<int8_t>::max());
|
||||
pad_value = static_cast<int8_t>(op_context.output->params.zero_point);
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE(context, op_context.output->params.scale ==
|
||||
op_context.constant_values->params.scale);
|
||||
pad_value = *GetTensorData<int8_t>(op_context.constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value);
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value);
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt32: {
|
||||
int32_t pad_value =
|
||||
op_context.constant_values == nullptr
|
||||
? 0
|
||||
: *GetTensorData<int32_t>(op_context.constant_values);
|
||||
TF_LITE_PAD(reference_ops, Pad, int32_t, pad_value);
|
||||
} break;
|
||||
default:
|
||||
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
|
||||
TfLiteTypeGetName(op_context.input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
#undef TF_LITE_PAD
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pad
|
||||
|
||||
TfLiteRegistration* Register_PAD() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
// Also register Pad as PadV2.
|
||||
TfLiteRegistration* Register_PADV2() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
238
code/lib/tfmicro/tensorflow/lite/micro/kernels/pooling.cc
Normal file
238
code/lib/tfmicro/tensorflow/lite/micro/kernels/pooling.cc
Normal file
@@ -0,0 +1,238 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pooling {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(const TfLiteContext* context,
|
||||
const TfLitePoolParams* params,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* output, OpData* data) {
|
||||
// input: batch, height, width, channel
|
||||
int height = SizeOfDimension(input, 1);
|
||||
int width = SizeOfDimension(input, 2);
|
||||
|
||||
int out_height, out_width;
|
||||
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
/*dilation_rate_height=*/1,
|
||||
/*dilation_rate_width=*/1, height, width, params->filter_height,
|
||||
params->filter_width, params->padding, &out_height, &out_width);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
int32_t activation_min, activation_max;
|
||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&activation_min, &activation_max);
|
||||
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = activation_min;
|
||||
op_params.quantized_activation_max = activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::MaxPool(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
|
||||
int32_t activation_min, activation_max;
|
||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&activation_min, &activation_max);
|
||||
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = activation_min;
|
||||
op_params.quantized_activation_max = activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::MaxPool(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::MaxPool(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
||||
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||
|
||||
// Inputs and outputs share the same type, guaranteed by the converter.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
AverageEvalFloat(context, node, params, &data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
AverageEvalQuantized(context, node, params, &data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
MaxEvalFloat(context, node, params, &data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
MaxEvalQuantized(context, node, params, &data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration* Register_AVERAGE_POOL_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_MAX_POOL_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
121
code/lib/tfmicro/tensorflow/lite/micro/kernels/prelu.cc
Normal file
121
code/lib/tfmicro/tensorflow/lite/micro/kernels/prelu.cc
Normal file
@@ -0,0 +1,121 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
inline void BroadcastPrelu4DSlowFloat(
|
||||
const RuntimeShape& unextended_input1_shape, const float* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape, const float* input2_data,
|
||||
const RuntimeShape& unextended_output_shape, float* output_data) {
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
|
||||
for (int b = 0; b < output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < output_shape.Dims(3); ++c) {
|
||||
auto out_idx = Offset(output_shape, b, y, x, c);
|
||||
auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
|
||||
auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
|
||||
auto in1_val = input1_data[in1_idx];
|
||||
auto in2_val = input2_data[in2_idx];
|
||||
output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* alpha = GetInput(context, node, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
int32_t output_multiplier = 0;
|
||||
int output_shift = 0;
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
|
||||
double real_multiplier = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
|
||||
&output_shift);
|
||||
}
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
BroadcastPrelu4DSlowFloat(
|
||||
GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(alpha), GetTensorData<float>(alpha),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
PreluParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.alpha_offset = -alpha->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = output_multiplier;
|
||||
op_params.output_shift = output_shift;
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Only float32 and uint8 are supported currently, got %d.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_PRELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::PreluPrepare,
|
||||
/*invoke=*/activations::PreluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
129
code/lib/tfmicro/tensorflow/lite/micro/kernels/quantize.cc
Normal file
129
code/lib/tfmicro/tensorflow/lite/micro/kernels/quantize.cc
Normal file
@@ -0,0 +1,129 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace quantize {
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
|
||||
// Currently this only support affine per-layer quantization.
|
||||
TF_LITE_ENSURE_EQ(context, output->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(output->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteFloat32 || input->type == kTfLiteInt16);
|
||||
TF_LITE_ENSURE(context,
|
||||
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
tflite::QuantizationParams op_params;
|
||||
op_params.zero_point = output->params.zero_point;
|
||||
op_params.scale = static_cast<double>(output->params.scale);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
double effective_scale =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
QuantizeMultiplier(effective_scale, &output_multiplier, &output_shift);
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int16_t>(input), size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace quantize
|
||||
|
||||
// This Op (QUANTIZE) quantizes the input and produces quantized output.
|
||||
// AffineQuantize takes scale and zero point and quantizes the float value to
|
||||
// quantized output, in int8 or uint8 format.
|
||||
TfLiteRegistration* Register_QUANTIZE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/quantize::Prepare,
|
||||
/*invoke=*/quantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
135
code/lib/tfmicro/tensorflow/lite/micro/kernels/reduce.cc
Normal file
135
code/lib/tfmicro/tensorflow/lite/micro/kernels/reduce.cc
Normal file
@@ -0,0 +1,135 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/reduce.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace reduce {
|
||||
|
||||
constexpr int kMaxNumberOfAxis = 4;
|
||||
constexpr int kMaxNumberOfReducedAxis = 2;
|
||||
|
||||
TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Inputs Tensor (dtype depends on quantization):
|
||||
// [0] = Input
|
||||
// [1] = Axis
|
||||
|
||||
// Outputs Tensor (dtype depends on quantization):
|
||||
// [0] = Output
|
||||
|
||||
// Validate number of inputs and outputs
|
||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Validate axis type
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
|
||||
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void ResolveAxis(const int* axis_data, int axis_count,
|
||||
tflite::MeanParams* op_params) {
|
||||
int i = 0;
|
||||
for (; i < axis_count; ++i) {
|
||||
op_params->axis[i] = static_cast<int16>(axis_data[i]);
|
||||
}
|
||||
for (; i < 4; ++i) {
|
||||
op_params->axis[i] = 1;
|
||||
}
|
||||
op_params->axis_count = axis_count;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TfLiteReducerParams* params =
|
||||
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
|
||||
|
||||
int num_axis = static_cast<int>(NumElements(axis));
|
||||
int temp_index[kMaxNumberOfAxis];
|
||||
int resolved_axis[kMaxNumberOfReducedAxis];
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::MeanParams op_params;
|
||||
ResolveAxis(GetTensorData<int>(axis), num_axis, &op_params);
|
||||
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
|
||||
// scratch tensor allocation has been implemented in (b/132070898)
|
||||
bool is_valid_inputs =
|
||||
(NumDimensions(input) == 4 && op_params.axis_count == 2 &&
|
||||
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
|
||||
TF_LITE_ENSURE_MSG(
|
||||
context, is_valid_inputs == true,
|
||||
"Number of Input "
|
||||
"dimensions != 4 OR the Axis is not either [1, 2] or [2, 1]");
|
||||
// TODO(b/139102329): Handle the below special case in the combined
|
||||
// reference method.
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims) {
|
||||
reference_ops::Mean(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, GetTensorData<float>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis,
|
||||
GetTensorData<float>(output)));
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
|
||||
TF_LITE_ENSURE_MSG(context, false,
|
||||
"Currently, only float32 input type "
|
||||
"is supported.");
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace reduce
|
||||
|
||||
TfLiteRegistration* Register_MEAN() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMeanOrSum,
|
||||
/*invoke=*/reduce::EvalMean,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
106
code/lib/tfmicro/tensorflow/lite/micro/kernels/reshape.cc
Normal file
106
code/lib/tfmicro/tensorflow/lite/micro/kernels/reshape.cc
Normal file
@@ -0,0 +1,106 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace reshape {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
// Tensorflow's Reshape allows one of the shape components to have the
|
||||
// special -1 value, meaning it will be calculated automatically based on the
|
||||
// input. Here we calculate what that dimension should be so that the number
|
||||
// of output elements in the same as the number of input elements.
|
||||
int num_input_elements = NumElements(input);
|
||||
TfLiteIntArray* output_shape = output->dims;
|
||||
|
||||
if (NumInputs(node) == 1 && // Legacy scalar supported with params.
|
||||
output_shape->size == 1 && output_shape->data[0] == 0) {
|
||||
// Legacy tflite models use a shape parameter of [0] to indicate scalars,
|
||||
// so adjust accordingly. TODO(b/111614235): Allow zero-sized buffers during
|
||||
// toco conversion.
|
||||
output_shape->size = 0;
|
||||
}
|
||||
|
||||
int num_output_elements = 1;
|
||||
int stretch_dim = -1;
|
||||
for (int i = 0; i < output_shape->size; ++i) {
|
||||
int value = output_shape->data[i];
|
||||
if (value == -1) {
|
||||
TF_LITE_ENSURE_EQ(context, stretch_dim, -1);
|
||||
stretch_dim = i;
|
||||
} else {
|
||||
num_output_elements *= value;
|
||||
}
|
||||
}
|
||||
if (stretch_dim != -1) {
|
||||
output_shape->data[stretch_dim] = num_input_elements / num_output_elements;
|
||||
num_output_elements *= output_shape->data[stretch_dim];
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, ReshapeOutput(context, node), kTfLiteOk);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
// Do nothing for in-place reshape.
|
||||
if (input->data.raw != output->data.raw) {
|
||||
// Otherwise perform reshape with copy.
|
||||
for (size_t i = 0; i < input->bytes; ++i) {
|
||||
output->data.raw[i] = input->data.raw[i];
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace reshape
|
||||
|
||||
TfLiteRegistration* Register_RESHAPE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reshape::Prepare,
|
||||
/*invoke=*/reshape::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,112 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace resize_nearest_neighbor {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kSizeTensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(DEBUG)
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
// Our current implementations rely on the input being 4D,
|
||||
// and the size being 1D tensor with exactly 2 elements.
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
|
||||
TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32);
|
||||
TF_LITE_ENSURE_EQ(context, size->dims->data[0], 2);
|
||||
|
||||
output->type = input->type;
|
||||
|
||||
if (!IsConstantTensor(size)) {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Dynamic tensors are unsupported in tfmicro.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
tflite::ResizeNearestNeighborParams op_params;
|
||||
op_params.align_corners = params->align_corners;
|
||||
op_params.half_pixel_centers = false;
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<int32>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<int32>(output));
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Output type is %d, requires float, uint8 or int8.",
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace resize_nearest_neighbor
|
||||
|
||||
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/resize_nearest_neighbor::Prepare,
|
||||
/*invoke=*/resize_nearest_neighbor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
70
code/lib/tfmicro/tensorflow/lite/micro/kernels/round.cc
Normal file
70
code/lib/tfmicro/tensorflow/lite/micro/kernels/round.cc
Normal file
@@ -0,0 +1,70 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/round.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace round {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Round(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace round
|
||||
|
||||
TfLiteRegistration* Register_ROUND() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/round::Prepare,
|
||||
/*invoke=*/round::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user