mirror of
https://github.com/jomjol/AI-on-the-edge-device.git
synced 2025-12-12 14:37:06 +03:00
Update tflite
This commit is contained in:
@@ -67,8 +67,9 @@ typedef struct {
|
||||
typedef enum {
|
||||
kTfLiteActNone = 0,
|
||||
kTfLiteActRelu,
|
||||
kTfLiteActRelu1, // min(max(-1, x), 1)
|
||||
kTfLiteActRelu6, // min(max(0, x), 6)
|
||||
kTfLiteActReluN1To1, // min(max(-1, x), 1)
|
||||
kTfLiteActRelu1 = kTfLiteActReluN1To1, // kTfLiteActRelu1 will be deprecated.
|
||||
kTfLiteActRelu6, // min(max(0, x), 6)
|
||||
kTfLiteActTanh,
|
||||
kTfLiteActSignBit,
|
||||
kTfLiteActSigmoid,
|
||||
@@ -198,6 +199,8 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
TfLiteFusedActivation activation;
|
||||
// Parameter added for the version 4.
|
||||
bool pot_scale_int16;
|
||||
} TfLiteAddParams;
|
||||
|
||||
typedef struct {
|
||||
@@ -219,6 +222,8 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
TfLiteFusedActivation activation;
|
||||
// Parameter added for the version 5.
|
||||
bool pot_scale_int16;
|
||||
} TfLiteSubParams;
|
||||
|
||||
typedef struct {
|
||||
@@ -297,6 +302,7 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
bool align_corners;
|
||||
bool half_pixel_centers;
|
||||
} TfLiteResizeNearestNeighborParams;
|
||||
|
||||
typedef struct {
|
||||
@@ -459,6 +465,15 @@ typedef struct {
|
||||
int body_subgraph_index;
|
||||
} TfLiteWhileParams;
|
||||
|
||||
typedef struct {
|
||||
bool exclusive;
|
||||
bool reverse;
|
||||
} TfLiteCumsumParams;
|
||||
|
||||
typedef struct {
|
||||
int init_subgraph_index;
|
||||
} TfLiteCallOnceParams;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
@@ -79,7 +79,8 @@ TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
|
||||
void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
|
||||
|
||||
void TfLiteTensorDataFree(TfLiteTensor* t) {
|
||||
if (t->allocation_type == kTfLiteDynamic) {
|
||||
if (t->allocation_type == kTfLiteDynamic ||
|
||||
t->allocation_type == kTfLitePersistentRo) {
|
||||
free(t->data.raw);
|
||||
}
|
||||
t->data.raw = NULL;
|
||||
@@ -172,7 +173,8 @@ void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
|
||||
}
|
||||
|
||||
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
|
||||
if (tensor->allocation_type != kTfLiteDynamic) {
|
||||
if (tensor->allocation_type != kTfLiteDynamic &&
|
||||
tensor->allocation_type != kTfLitePersistentRo) {
|
||||
return;
|
||||
}
|
||||
// TODO(b/145340303): Tensor data should be aligned.
|
||||
@@ -205,6 +207,8 @@ const char* TfLiteTypeGetName(TfLiteType type) {
|
||||
return "BOOL";
|
||||
case kTfLiteComplex64:
|
||||
return "COMPLEX64";
|
||||
case kTfLiteComplex128:
|
||||
return "COMPLEX128";
|
||||
case kTfLiteString:
|
||||
return "STRING";
|
||||
case kTfLiteFloat16:
|
||||
|
||||
@@ -29,6 +29,9 @@ limitations under the License.
|
||||
// TfLiteDelegate - allows delegation of nodes to alternative backends.
|
||||
//
|
||||
// Some abstractions in this file are created and managed by Interpreter.
|
||||
//
|
||||
// NOTE: The order of values in these structs are "semi-ABI stable". New values
|
||||
// should be added only to the end of structs and never reordered.
|
||||
|
||||
#ifndef TENSORFLOW_LITE_C_COMMON_H_
|
||||
#define TENSORFLOW_LITE_C_COMMON_H_
|
||||
@@ -43,8 +46,18 @@ extern "C" {
|
||||
|
||||
typedef enum TfLiteStatus {
|
||||
kTfLiteOk = 0,
|
||||
|
||||
// Generally referring to an error in the runtime (i.e. interpreter)
|
||||
kTfLiteError = 1,
|
||||
kTfLiteDelegateError = 2
|
||||
|
||||
// Generally referring to an error from a TfLiteDelegate itself.
|
||||
kTfLiteDelegateError = 2,
|
||||
|
||||
// Generally referring to an error in applying a delegate due to
|
||||
// incompatibility between runtime and delegate, e.g., this error is returned
|
||||
// when trying to apply a TfLite delegate onto a model graph that's already
|
||||
// immutable.
|
||||
kTfLiteApplicationError = 3
|
||||
} TfLiteStatus;
|
||||
|
||||
// The list of external context types known to TF Lite. This list exists solely
|
||||
@@ -55,7 +68,7 @@ typedef enum TfLiteExternalContextType {
|
||||
kTfLiteEigenContext = 0, // include eigen_support.h to use.
|
||||
kTfLiteGemmLowpContext = 1, // include gemm_support.h to use.
|
||||
kTfLiteEdgeTpuContext = 2, // Placeholder for Edge TPU support.
|
||||
kTfLiteCpuBackendContext = 3, // include cpu_backend_support.h to use.
|
||||
kTfLiteCpuBackendContext = 3, // include cpu_backend_context.h to use.
|
||||
kTfLiteMaxExternalContexts = 4
|
||||
} TfLiteExternalContextType;
|
||||
|
||||
@@ -83,8 +96,9 @@ typedef struct TfLiteIntArray {
|
||||
int size;
|
||||
// gcc 6.1+ have a bug where flexible members aren't properly handled
|
||||
// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
|
||||
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
|
||||
__GNUC_MINOR__ >= 1
|
||||
#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
|
||||
__GNUC_MINOR__ >= 1) || \
|
||||
defined(HEXAGON) || (__clang_major__ == 7 && __clang_minor__ == 1)
|
||||
int data[0];
|
||||
#else
|
||||
int data[];
|
||||
@@ -122,6 +136,7 @@ typedef struct TfLiteFloatArray {
|
||||
int size;
|
||||
// gcc 6.1+ have a bug where flexible members aren't properly handled
|
||||
// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
|
||||
// This also applies to the toolchain used for Qualcomm Hexagon DSPs.
|
||||
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
|
||||
__GNUC_MINOR__ >= 1
|
||||
float data[0];
|
||||
@@ -200,6 +215,7 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
|
||||
// the current function, while also reporting the location of the error.
|
||||
// `a` and `b` may be evaluated more than once, so no side effects or
|
||||
// extremely expensive computations should be done.
|
||||
// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes.
|
||||
#define TF_LITE_ENSURE_EQ(context, a, b) \
|
||||
do { \
|
||||
if ((a) != (b)) { \
|
||||
@@ -219,6 +235,17 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define TF_LITE_ENSURE_NEAR(context, a, b, epsilon) \
|
||||
do { \
|
||||
auto delta = ((a) > (b)) ? ((a) - (b)) : ((b) - (a)); \
|
||||
if (delta > epsilon) { \
|
||||
TF_LITE_KERNEL_LOG((context), "%s:%d %s not near %s (%f != %f)", \
|
||||
__FILE__, __LINE__, #a, #b, static_cast<double>(a), \
|
||||
static_cast<double>(b)); \
|
||||
return kTfLiteError; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define TF_LITE_ENSURE_OK(context, status) \
|
||||
do { \
|
||||
const TfLiteStatus s = (status); \
|
||||
@@ -227,11 +254,32 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
|
||||
// library.
|
||||
#ifdef SWIG
|
||||
#define TFL_CAPI_EXPORT
|
||||
#else
|
||||
#if defined(_WIN32)
|
||||
#ifdef TFL_COMPILE_LIBRARY
|
||||
#define TFL_CAPI_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define TFL_CAPI_EXPORT __declspec(dllimport)
|
||||
#endif // TFL_COMPILE_LIBRARY
|
||||
#else
|
||||
#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
|
||||
#endif // _WIN32
|
||||
#endif // SWIG
|
||||
|
||||
// Single-precision complex data type compatible with the C99 definition.
|
||||
typedef struct TfLiteComplex64 {
|
||||
float re, im; // real and imaginary parts, respectively.
|
||||
} TfLiteComplex64;
|
||||
|
||||
// Double-precision complex data type compatible with the C99 definition.
|
||||
typedef struct TfLiteComplex128 {
|
||||
double re, im; // real and imaginary parts, respectively.
|
||||
} TfLiteComplex128;
|
||||
|
||||
// Half precision data type compatible with the C99 definition.
|
||||
typedef struct TfLiteFloat16 {
|
||||
uint16_t data;
|
||||
@@ -251,6 +299,7 @@ typedef enum {
|
||||
kTfLiteInt8 = 9,
|
||||
kTfLiteFloat16 = 10,
|
||||
kTfLiteFloat64 = 11,
|
||||
kTfLiteComplex128 = 12,
|
||||
} TfLiteType;
|
||||
|
||||
// Return the name of a given type, for error reporting purposes.
|
||||
@@ -307,26 +356,39 @@ typedef union TfLitePtrUnion {
|
||||
int64_t* i64;
|
||||
float* f;
|
||||
TfLiteFloat16* f16;
|
||||
double* f64;
|
||||
char* raw;
|
||||
const char* raw_const;
|
||||
uint8_t* uint8;
|
||||
bool* b;
|
||||
int16_t* i16;
|
||||
TfLiteComplex64* c64;
|
||||
TfLiteComplex128* c128;
|
||||
int8_t* int8;
|
||||
/* Only use this member. */
|
||||
void* data;
|
||||
} TfLitePtrUnion;
|
||||
|
||||
// Memory allocation strategies. kTfLiteMmapRo is for read-only memory-mapped
|
||||
// data (or data externally allocated). kTfLiteArenaRw is arena allocated
|
||||
// data. kTfLiteDynamic is for tensors that are allocated during evaluation.
|
||||
// Memory allocation strategies.
|
||||
// * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
|
||||
// * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
|
||||
// and available during eval.
|
||||
// * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
|
||||
// only available during eval.
|
||||
// * kTfLiteDynamic: Allocated during eval, or for string tensors.
|
||||
// * kTfLitePersistentRo: Allocated and populated during prepare. This is
|
||||
// useful for tensors that can be computed during prepare and treated
|
||||
// as constant inputs for downstream ops (also in prepare).
|
||||
// * kTfLiteCustom: Custom memory allocation provided by the user. See
|
||||
// TfLiteCustomAllocation below.
|
||||
typedef enum TfLiteAllocationType {
|
||||
kTfLiteMemNone = 0,
|
||||
kTfLiteMmapRo,
|
||||
kTfLiteArenaRw,
|
||||
kTfLiteArenaRwPersistent,
|
||||
kTfLiteDynamic,
|
||||
kTfLitePersistentRo,
|
||||
kTfLiteCustom,
|
||||
} TfLiteAllocationType;
|
||||
|
||||
// The delegates should use zero or positive integers to represent handles.
|
||||
@@ -359,8 +421,18 @@ typedef struct TfLiteSparsity {
|
||||
int dim_metadata_size;
|
||||
} TfLiteSparsity;
|
||||
|
||||
// An tensor in the interpreter system which is a wrapper around a buffer of
|
||||
// Defines a custom memory allocation not owned by the runtime.
|
||||
// `data` should be aligned to kDefaultTensorAlignment defined in
|
||||
// lite/util.h. (Currently 64 bytes)
|
||||
// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage.
|
||||
typedef struct TfLiteCustomAllocation {
|
||||
void* data;
|
||||
size_t bytes;
|
||||
} TfLiteCustomAllocation;
|
||||
|
||||
// A tensor in the interpreter system which is a wrapper around a buffer of
|
||||
// data including a dimensionality (or NULL if not currently defined).
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
typedef struct TfLiteTensor {
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
@@ -426,31 +498,6 @@ typedef struct TfLiteTensor {
|
||||
const TfLiteIntArray* dims_signature;
|
||||
} TfLiteTensor;
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Free data memory of tensor `t`.
|
||||
void TfLiteTensorDataFree(TfLiteTensor* t);
|
||||
|
||||
// Free quantization data.
|
||||
void TfLiteQuantizationFree(TfLiteQuantization* quantization);
|
||||
|
||||
// Free sparsity parameters.
|
||||
void TfLiteSparsityFree(TfLiteSparsity* sparsity);
|
||||
|
||||
// Free memory of tensor `t`.
|
||||
void TfLiteTensorFree(TfLiteTensor* t);
|
||||
|
||||
// Set all of a tensor's fields (and free any previously allocated data).
|
||||
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
|
||||
TfLiteQuantizationParams quantization, char* buffer,
|
||||
size_t size, TfLiteAllocationType allocation_type,
|
||||
const void* allocation, bool is_variable,
|
||||
TfLiteTensor* tensor);
|
||||
|
||||
// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
|
||||
// types other than kTfLiteDynamic will be ignored.
|
||||
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// A structure representing an instance of a node.
|
||||
// This structure only exhibits the inputs, outputs and user defined data, not
|
||||
// other features like the type.
|
||||
@@ -487,6 +534,130 @@ typedef struct TfLiteNode {
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
struct TfLiteDelegate* delegate;
|
||||
} TfLiteNode;
|
||||
#else // defined(TF_LITE_STATIC_MEMORY)?
|
||||
// NOTE: This flag is opt-in only at compile time.
|
||||
//
|
||||
// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
|
||||
// contains only the minimum fields required to initialize and prepare a micro
|
||||
// inference graph. The fields in this struct have been ordered from
|
||||
// largest-to-smallest for optimal struct sizeof.
|
||||
//
|
||||
// This struct does not use:
|
||||
// - allocation
|
||||
// - buffer_handle
|
||||
// - data_is_stale
|
||||
// - delegate
|
||||
// - dims_signature
|
||||
// - name
|
||||
// - sparsity
|
||||
typedef struct TfLiteTensor {
|
||||
// TODO(b/155784997): Consider consolidating these quantization fields:
|
||||
// Quantization information. Replaces params field above.
|
||||
TfLiteQuantization quantization;
|
||||
|
||||
// Quantization information.
|
||||
TfLiteQuantizationParams params;
|
||||
|
||||
// A union of data pointers. The appropriate type should be used for a typed
|
||||
// tensor based on `type`.
|
||||
TfLitePtrUnion data;
|
||||
|
||||
// A pointer to a structure representing the dimensionality interpretation
|
||||
// that the buffer should have. NOTE: the product of elements of `dims`
|
||||
// and the element datatype size should be equal to `bytes` below.
|
||||
TfLiteIntArray* dims;
|
||||
|
||||
// The number of bytes required to store the data of this Tensor. I.e.
|
||||
// (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if
|
||||
// type is kTfLiteFloat32 and dims = {3, 2} then
|
||||
// bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
|
||||
size_t bytes;
|
||||
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
TfLiteType type;
|
||||
|
||||
// How memory is mapped
|
||||
// kTfLiteMmapRo: Memory mapped read only.
|
||||
// i.e. weights
|
||||
// kTfLiteArenaRw: Arena allocated read write memory
|
||||
// (i.e. temporaries, outputs).
|
||||
TfLiteAllocationType allocation_type;
|
||||
|
||||
// True if the tensor is a variable.
|
||||
bool is_variable;
|
||||
} TfLiteTensor;
|
||||
|
||||
// Specific reduced TfLiteNode struct for TF Micro runtime. This struct contains
|
||||
// only the minimum fields required to represent a node.
|
||||
//
|
||||
// This struct does not use:
|
||||
// - delegate
|
||||
// - intermediates
|
||||
// - temporaries
|
||||
typedef struct TfLiteNode {
|
||||
// Inputs to this node expressed as indices into the simulator's tensors.
|
||||
TfLiteIntArray* inputs;
|
||||
|
||||
// Outputs to this node expressed as indices into the simulator's tensors.
|
||||
TfLiteIntArray* outputs;
|
||||
|
||||
// Opaque data provided by the node implementer through `Registration.init`.
|
||||
void* user_data;
|
||||
|
||||
// Opaque data provided to the node if the node is a builtin. This is usually
|
||||
// a structure defined in builtin_op_data.h
|
||||
void* builtin_data;
|
||||
|
||||
// Custom initial data. This is the opaque data provided in the flatbuffer.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
const void* custom_initial_data;
|
||||
int custom_initial_data_size;
|
||||
} TfLiteNode;
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// Light-weight tensor struct for TF Micro runtime. Provides the minimal amount
|
||||
// of information required for a kernel to run during TfLiteRegistration::Eval.
|
||||
// TODO(b/160955687): Move this field into TF_LITE_STATIC_MEMORY when TFLM
|
||||
// builds with this flag by default internally.
|
||||
typedef struct TfLiteEvalTensor {
|
||||
// A union of data pointers. The appropriate type should be used for a typed
|
||||
// tensor based on `type`.
|
||||
TfLitePtrUnion data;
|
||||
|
||||
// A pointer to a structure representing the dimensionality interpretation
|
||||
// that the buffer should have.
|
||||
TfLiteIntArray* dims;
|
||||
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
TfLiteType type;
|
||||
} TfLiteEvalTensor;
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Free data memory of tensor `t`.
|
||||
void TfLiteTensorDataFree(TfLiteTensor* t);
|
||||
|
||||
// Free quantization data.
|
||||
void TfLiteQuantizationFree(TfLiteQuantization* quantization);
|
||||
|
||||
// Free sparsity parameters.
|
||||
void TfLiteSparsityFree(TfLiteSparsity* sparsity);
|
||||
|
||||
// Free memory of tensor `t`.
|
||||
void TfLiteTensorFree(TfLiteTensor* t);
|
||||
|
||||
// Set all of a tensor's fields (and free any previously allocated data).
|
||||
void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
|
||||
TfLiteQuantizationParams quantization, char* buffer,
|
||||
size_t size, TfLiteAllocationType allocation_type,
|
||||
const void* allocation, bool is_variable,
|
||||
TfLiteTensor* tensor);
|
||||
|
||||
// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
|
||||
// types other than kTfLiteDynamic will be ignored.
|
||||
void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
//
|
||||
@@ -578,12 +749,11 @@ typedef struct TfLiteContext {
|
||||
void* profiler;
|
||||
|
||||
// Allocate persistent buffer which has the same life time as the interpreter.
|
||||
// Returns nullptr on failure.
|
||||
// The memory is allocated from heap for TFL, and from tail in TFLM.
|
||||
// If *ptr is not nullptr, the pointer will be reallocated.
|
||||
// This method is only available in Prepare stage.
|
||||
// This method is only available in Init or Prepare stage.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
TfLiteStatus (*AllocatePersistentBuffer)(struct TfLiteContext* ctx,
|
||||
size_t bytes, void** ptr);
|
||||
void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes);
|
||||
|
||||
// Allocate a buffer which will be deallocated right after invoke phase.
|
||||
// The memory is allocated from heap in TFL, and from volatile arena in TFLM.
|
||||
@@ -638,6 +808,18 @@ typedef struct TfLiteContext {
|
||||
TfLiteStatus (*PreviewDelegatePartitioning)(
|
||||
struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
|
||||
TfLiteDelegateParams** partition_params_array, int* num_partitions);
|
||||
|
||||
// Returns a TfLiteTensor struct for a given index.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
// WARNING: This method may not be available on all platforms.
|
||||
TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context,
|
||||
int tensor_idx);
|
||||
|
||||
// Returns a TfLiteEvalTensor struct for a given index.
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
// WARNING: This method may not be available on all platforms.
|
||||
TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
|
||||
int tensor_idx);
|
||||
} TfLiteContext;
|
||||
|
||||
typedef struct TfLiteRegistration {
|
||||
@@ -712,7 +894,26 @@ typedef enum TfLiteDelegateFlags {
|
||||
//
|
||||
// If the delegate isn't capable to handle dynamic tensors, this flag need
|
||||
// to be set to false.
|
||||
kTfLiteDelegateFlagsAllowDynamicTensors = 1
|
||||
kTfLiteDelegateFlagsAllowDynamicTensors = 1,
|
||||
|
||||
// This flag can be used by delegates (that allow dynamic tensors) to ensure
|
||||
// applicable tensor shapes are automatically propagated in the case of tensor
|
||||
// resizing.
|
||||
// This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors
|
||||
// of a delegate kernel will have correct shapes before its Prepare() method
|
||||
// is called. The runtime leverages TFLite builtin ops in the original
|
||||
// execution plan to propagate shapes.
|
||||
//
|
||||
// A few points to note:
|
||||
// 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is
|
||||
// false, this one is redundant since the delegate kernels are re-initialized
|
||||
// every time tensors are resized.
|
||||
// 2. Enabling this flag adds some overhead to AllocateTensors(), since extra
|
||||
// work is required to prepare the original execution plan.
|
||||
// 3. This flag requires that the original execution plan only have ops with
|
||||
// valid registrations (and not 'dummy' custom ops like with Flex).
|
||||
// WARNING: This feature is experimental and subject to change.
|
||||
kTfLiteDelegateFlagsRequirePropagatedShapes = 2
|
||||
} TfLiteDelegateFlags;
|
||||
|
||||
// WARNING: This is an experimental interface that is subject to change.
|
||||
@@ -731,8 +932,9 @@ typedef struct TfLiteDelegate {
|
||||
struct TfLiteDelegate* delegate);
|
||||
|
||||
// Copy the data from delegate buffer handle into raw memory of the given
|
||||
// 'tensor'. This cannot be null. The delegate is allowed to allocate the raw
|
||||
// bytes as long as it follows the rules for kTfLiteDynamic tensors.
|
||||
// 'tensor'. Note that the delegate is allowed to allocate the raw bytes as
|
||||
// long as it follows the rules for kTfLiteDynamic tensors, in which case this
|
||||
// cannot be null.
|
||||
TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
|
||||
struct TfLiteDelegate* delegate,
|
||||
TfLiteBufferHandle buffer_handle,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,9 +19,12 @@ limitations under the License.
|
||||
// flatbuffer serialization format into in-memory values that are used by the
|
||||
// runtime API and interpreter.
|
||||
|
||||
#include <cstddef>
|
||||
#include <new>
|
||||
#include <type_traits>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
@@ -42,7 +45,7 @@ class BuiltinDataAllocator {
|
||||
// platform targets support that properly.
|
||||
static_assert(std::is_pod<T>::value, "Builtin data structure must be POD.");
|
||||
void* allocated_memory = this->Allocate(sizeof(T), alignof(T));
|
||||
return new (allocated_memory) T;
|
||||
return new (allocated_memory) T();
|
||||
}
|
||||
|
||||
virtual ~BuiltinDataAllocator() {}
|
||||
@@ -66,6 +69,196 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
|
||||
TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
TfLiteStatus ParseAbs(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseConcatenation(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseConv2D(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseCos(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseDepthwiseConv2D(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseDequantize(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseEqual(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseFloor(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseFullyConnected(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseGreater(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseGreaterEqual(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseL2Normalization(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLess(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLessEqual(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLog(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLogicalAnd(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLogicalNot(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLogicalOr(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseLogistic(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseNeg(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseNotEqual(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParsePack(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParsePad(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParsePadV2(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParsePool(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParsePrelu(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseRelu(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseRelu6(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseReshape(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseResizeBilinear(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseResizeNearestNeighbor(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseRound(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSin(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSoftmax(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSplitV(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseStridedSlice(const Operator* op,
|
||||
ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator,
|
||||
void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseSvdf(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseTanh(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
|
||||
BuiltinDataAllocator* allocator, void** builtin_data);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_CORE_API_FLATBUFFER_CONVERSIONS_H_
|
||||
|
||||
@@ -15,6 +15,11 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/core/api/op_resolver.h"
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/schema/schema_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus GetRegistrationFromOpCode(
|
||||
@@ -22,7 +27,7 @@ TfLiteStatus GetRegistrationFromOpCode(
|
||||
ErrorReporter* error_reporter, const TfLiteRegistration** registration) {
|
||||
TfLiteStatus status = kTfLiteOk;
|
||||
*registration = nullptr;
|
||||
auto builtin_code = opcode->builtin_code();
|
||||
auto builtin_code = GetBuiltinCode(opcode);
|
||||
int version = opcode->version();
|
||||
|
||||
if (builtin_code > BuiltinOperator_MAX ||
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
@@ -32,6 +34,16 @@ class OpResolver {
|
||||
/// Finds the op registration of a custom operator by op name.
|
||||
virtual const TfLiteRegistration* FindOp(const char* op,
|
||||
int version) const = 0;
|
||||
|
||||
// Returns optional delegates for resolving and handling ops in the flatbuffer
|
||||
// model. This may be used in addition to the standard TfLiteRegistration
|
||||
// lookup for graph resolution.
|
||||
using TfLiteDelegatePtrVector =
|
||||
std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
|
||||
virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const {
|
||||
return TfLiteDelegatePtrVector();
|
||||
}
|
||||
|
||||
virtual ~OpResolver() {}
|
||||
};
|
||||
|
||||
|
||||
194
code/lib/tfmicro/tensorflow/lite/core/api/profiler.h
Normal file
194
code/lib/tfmicro/tensorflow/lite/core/api/profiler.h
Normal file
@@ -0,0 +1,194 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_CORE_API_PROFILER_H_
|
||||
#define TENSORFLOW_LITE_CORE_API_PROFILER_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// A simple utility for enabling profiled event tracing in TensorFlow Lite.
|
||||
class Profiler {
|
||||
public:
|
||||
// As certain Profiler instance might be only interested in certain event
|
||||
// types, we define each event type value to allow a Profiler to use
|
||||
// bitmasking bitwise operations to determine whether an event should be
|
||||
// recorded or not.
|
||||
enum class EventType {
|
||||
// Default event type, the metadata field has no special significance.
|
||||
DEFAULT = 1,
|
||||
|
||||
// The event is an operator invocation and the event_metadata field is the
|
||||
// index of operator node.
|
||||
OPERATOR_INVOKE_EVENT = 2,
|
||||
|
||||
// The event is an invocation for an internal operator of a TFLite delegate.
|
||||
// The event_metadata field is the index of operator node that's specific to
|
||||
// the delegate.
|
||||
DELEGATE_OPERATOR_INVOKE_EVENT = 4,
|
||||
|
||||
// The event is a recording of runtime instrumentation such as the overall
|
||||
// TFLite runtime status, the TFLite delegate status (if a delegate
|
||||
// is applied), and the overall model inference latency etc.
|
||||
// Note, the delegate status and overall status are stored as separate
|
||||
// event_metadata fields. In particular, the delegate status is encoded
|
||||
// as DelegateStatus::full_status().
|
||||
GENERAL_RUNTIME_INSTRUMENTATION_EVENT = 8,
|
||||
};
|
||||
|
||||
virtual ~Profiler() {}
|
||||
|
||||
// Signals the beginning of an event and returns a handle to the profile
|
||||
// event. The `event_metadata1` and `event_metadata2` have different
|
||||
// interpretations based on the actual Profiler instance and the `event_type`.
|
||||
// For example, as for the 'SubgraphAwareProfiler' defined in
|
||||
// lite/core/subgraph.h, when the event_type is OPERATOR_INVOKE_EVENT,
|
||||
// `event_metadata1` represents the index of a TFLite node, and
|
||||
// `event_metadata2` represents the index of the subgraph that this event
|
||||
// comes from.
|
||||
virtual uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
int64_t event_metadata1,
|
||||
int64_t event_metadata2) = 0;
|
||||
// Similar w/ the above, but `event_metadata2` defaults to 0.
|
||||
uint32_t BeginEvent(const char* tag, EventType event_type,
|
||||
int64_t event_metadata) {
|
||||
return BeginEvent(tag, event_type, event_metadata, /*event_metadata2*/ 0);
|
||||
}
|
||||
|
||||
// Signals an end to the specified profile event with 'event_metadata's, This
|
||||
// is useful when 'event_metadata's are not available when the event begins
|
||||
// or when one wants to overwrite the 'event_metadata's set at the beginning.
|
||||
virtual void EndEvent(uint32_t event_handle, int64_t event_metadata1,
|
||||
int64_t event_metadata2) {}
|
||||
// Signals an end to the specified profile event.
|
||||
virtual void EndEvent(uint32_t event_handle) = 0;
|
||||
|
||||
// Appends an event of type 'event_type' with 'tag' and 'event_metadata'
|
||||
// which started at 'start' and ended at 'end'
|
||||
// Note:
|
||||
// In cases were ProfileSimmarizer and tensorflow::StatsCalculator are used
|
||||
// they assume the value is in "usec", if in any case subclasses
|
||||
// didn't put usec, then the values are not meaningful.
|
||||
// TODO karimnosseir: Revisit and make the function more clear.
|
||||
void AddEvent(const char* tag, EventType event_type, uint64_t start,
|
||||
uint64_t end, int64_t event_metadata) {
|
||||
AddEvent(tag, event_type, start, end, event_metadata,
|
||||
/*event_metadata2*/ 0);
|
||||
}
|
||||
|
||||
virtual void AddEvent(const char* tag, EventType event_type, uint64_t start,
|
||||
uint64_t end, int64_t event_metadata1,
|
||||
int64_t event_metadata2) {}
|
||||
|
||||
protected:
|
||||
friend class ScopedProfile;
|
||||
};
|
||||
|
||||
// Adds a profile event to `profiler` that begins with the construction
|
||||
// of the object and ends when the object goes out of scope.
|
||||
// The lifetime of tag should be at least the lifetime of `profiler`.
|
||||
// `profiler` may be null, in which case nothing is profiled.
|
||||
class ScopedProfile {
|
||||
public:
|
||||
ScopedProfile(Profiler* profiler, const char* tag,
|
||||
Profiler::EventType event_type = Profiler::EventType::DEFAULT,
|
||||
int64_t event_metadata = 0)
|
||||
: profiler_(profiler), event_handle_(0) {
|
||||
if (profiler) {
|
||||
event_handle_ = profiler_->BeginEvent(tag, event_type, event_metadata);
|
||||
}
|
||||
}
|
||||
|
||||
~ScopedProfile() {
|
||||
if (profiler_) {
|
||||
profiler_->EndEvent(event_handle_);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
Profiler* profiler_;
|
||||
uint32_t event_handle_;
|
||||
};
|
||||
|
||||
class ScopedOperatorProfile : public ScopedProfile {
|
||||
public:
|
||||
ScopedOperatorProfile(Profiler* profiler, const char* tag, int node_index)
|
||||
: ScopedProfile(profiler, tag, Profiler::EventType::OPERATOR_INVOKE_EVENT,
|
||||
static_cast<uint32_t>(node_index)) {}
|
||||
};
|
||||
|
||||
class ScopedDelegateOperatorProfile : public ScopedProfile {
|
||||
public:
|
||||
ScopedDelegateOperatorProfile(Profiler* profiler, const char* tag,
|
||||
int node_index)
|
||||
: ScopedProfile(profiler, tag,
|
||||
Profiler::EventType::DELEGATE_OPERATOR_INVOKE_EVENT,
|
||||
static_cast<uint32_t>(node_index)) {}
|
||||
};
|
||||
|
||||
class ScopedRuntimeInstrumentationProfile : public ScopedProfile {
|
||||
public:
|
||||
ScopedRuntimeInstrumentationProfile(Profiler* profiler, const char* tag)
|
||||
: ScopedProfile(
|
||||
profiler, tag,
|
||||
Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, -1) {}
|
||||
|
||||
void set_runtime_status(int64_t delegate_status, int64_t interpreter_status) {
|
||||
if (profiler_) {
|
||||
delegate_status_ = delegate_status;
|
||||
interpreter_status_ = interpreter_status;
|
||||
}
|
||||
}
|
||||
|
||||
~ScopedRuntimeInstrumentationProfile() {
|
||||
if (profiler_) {
|
||||
profiler_->EndEvent(event_handle_, delegate_status_, interpreter_status_);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t delegate_status_;
|
||||
int64_t interpreter_status_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#define TFLITE_VARNAME_UNIQ_IMPL(name, ctr) name##ctr
|
||||
#define TFLITE_VARNAME_UNIQ(name, ctr) TFLITE_VARNAME_UNIQ_IMPL(name, ctr)
|
||||
|
||||
#define TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler, tag) \
|
||||
tflite::ScopedProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
|
||||
(profiler), (tag))
|
||||
|
||||
#define TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE(profiler, tag, node_index) \
|
||||
tflite::ScopedOperatorProfile TFLITE_VARNAME_UNIQ(_profile_, __COUNTER__)( \
|
||||
(profiler), (tag), (node_index))
|
||||
|
||||
#define TFLITE_SCOPED_DELEGATE_OPERATOR_PROFILE(profiler, tag, node_index) \
|
||||
tflite::ScopedDelegateOperatorProfile TFLITE_VARNAME_UNIQ( \
|
||||
_profile_, __COUNTER__)((profiler), (tag), (node_index))
|
||||
|
||||
#define TFLITE_ADD_RUNTIME_INSTRUMENTATION_EVENT( \
|
||||
profiler, tag, delegate_status, interpreter_status) \
|
||||
do { \
|
||||
if (!profiler) { \
|
||||
const auto handle = profiler->BeginEvent( \
|
||||
tag, Profiler::EventType::GENERAL_RUNTIME_INSTRUMENTATION_EVENT, \
|
||||
delegate_status, interpreter_status); \
|
||||
profiler->EndEvent(handle); \
|
||||
} \
|
||||
} while (false);
|
||||
|
||||
#endif // TENSORFLOW_LITE_CORE_API_PROFILER_H_
|
||||
@@ -17,6 +17,8 @@ limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus ResetVariableTensor(TfLiteTensor* tensor) {
|
||||
|
||||
@@ -55,9 +55,12 @@ inline void GetActivationMinMax(FusedActivationFunctionType ac,
|
||||
}
|
||||
}
|
||||
|
||||
inline float ActivationFunctionWithMinMax(float x, float output_activation_min,
|
||||
float output_activation_max) {
|
||||
return std::min(std::max(x, output_activation_min), output_activation_max);
|
||||
template <typename T>
|
||||
inline T ActivationFunctionWithMinMax(T x, T output_activation_min,
|
||||
T output_activation_max) {
|
||||
using std::max;
|
||||
using std::min;
|
||||
return min(max(x, output_activation_min), output_activation_max);
|
||||
}
|
||||
|
||||
// Legacy function, left for compatibility only.
|
||||
@@ -135,23 +138,24 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
int32 x, int32 quantized_multiplier, int left_shift) {
|
||||
inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
int32_t x, int32_t quantized_multiplier, int left_shift) {
|
||||
using gemmlowp::RoundingDivideByPOT;
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
return RoundingDivideByPOT(
|
||||
SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift);
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
int32 x, int32 quantized_multiplier, int left_shift) {
|
||||
inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
int32_t x, int32_t quantized_multiplier, int left_shift) {
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
return SaturatingRoundingDoublingHighMul(x * (1 << left_shift),
|
||||
quantized_multiplier);
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier,
|
||||
int shift) {
|
||||
inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
|
||||
int32_t quantized_multiplier,
|
||||
int shift) {
|
||||
using gemmlowp::RoundingDivideByPOT;
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
int left_shift = shift > 0 ? shift : 0;
|
||||
@@ -161,16 +165,16 @@ inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier,
|
||||
right_shift);
|
||||
}
|
||||
|
||||
inline int32 MultiplyByQuantizedMultiplier(int64_t x,
|
||||
int32 quantized_multiplier,
|
||||
int shift) {
|
||||
inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
|
||||
int32_t quantized_multiplier,
|
||||
int shift) {
|
||||
// Inputs:
|
||||
// - quantized_multiplier has fixed point at bit 31
|
||||
// - shift is -31 to +7 (negative for right shift)
|
||||
//
|
||||
// Assumptions: The following input ranges are assumed
|
||||
// - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1)
|
||||
// - scaling is chosen so final scaled result fits in int32
|
||||
// - scaling is chosen so final scaled result fits in int32_t
|
||||
// - input x is in the range -(1<<47) <= x < (1<<47)
|
||||
assert(quantized_multiplier >= 0);
|
||||
assert(shift >= -31 && shift < 8);
|
||||
@@ -215,9 +219,9 @@ inline int CountLeadingSignBits(T integer_input) {
|
||||
using U = typename std::make_unsigned<T>::type;
|
||||
return integer_input >= 0
|
||||
? CountLeadingZeros(static_cast<U>(integer_input)) - 1
|
||||
: integer_input != std::numeric_limits<T>::min()
|
||||
? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
|
||||
: 0;
|
||||
: integer_input != std::numeric_limits<T>::min()
|
||||
? CountLeadingZeros(2 * static_cast<U>(-integer_input) - 1)
|
||||
: 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -237,8 +241,12 @@ inline Integer FloorLog2(Integer n) {
|
||||
|
||||
// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
|
||||
// softmax
|
||||
inline void gen_lut(const std::function<double(double)>& func, double min,
|
||||
double max, int16_t* table, const int num) {
|
||||
// func - the function to build the LUT for (e.g exp(x))
|
||||
// min,max - table limits
|
||||
// table - pointer to buffer
|
||||
// num - number of elements in the LUT
|
||||
inline void gen_lut(double (*func)(double), double min, double max,
|
||||
int16_t* table, const int num) {
|
||||
// size of table should equal to num + 1
|
||||
// last element only for slope calculation
|
||||
double step = (max - min) / (num - 1);
|
||||
@@ -259,7 +267,35 @@ inline void gen_lut(const std::function<double(double)>& func, double min,
|
||||
std::min(std::max(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
|
||||
}
|
||||
|
||||
// int16 func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
|
||||
// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
|
||||
// softmax
|
||||
// func - the function to build the LUT for (e.g exp(x))
|
||||
// min,max - table limits
|
||||
// table - pointer to buffer
|
||||
// num - number of elements in the LUT
|
||||
inline void gen_lut(float (*func)(float), float min, float max, int16_t* table,
|
||||
const int num) {
|
||||
// size of table should equal to num + 1
|
||||
// last element only for slope calculation
|
||||
float step = (max - min) / (num - 1);
|
||||
float half_step = step / 2.0f;
|
||||
for (int i = 0; i < num - 1; i++) {
|
||||
float sample_val = TfLiteRound(func(min + i * step) * 32768.0f);
|
||||
float midpoint_interp_val =
|
||||
TfLiteRound((func(min + (i + 1) * step) * 32768.0f +
|
||||
TfLiteRound(func(min + i * step) * 32768.0f)) /
|
||||
2.0f);
|
||||
float midpoint_val =
|
||||
TfLiteRound(func(min + i * step + half_step) * 32768.0f);
|
||||
float midpoint_err = midpoint_interp_val - midpoint_val;
|
||||
float bias = TfLiteRound(midpoint_err / 2.0f);
|
||||
table[i] = std::min(std::max(sample_val - bias, -32768.0f), 32767.0f);
|
||||
}
|
||||
table[num - 1] = std::min(
|
||||
std::max(TfLiteRound(func(max) * 32768.0f), -32768.0f), 32767.0f);
|
||||
}
|
||||
|
||||
// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
|
||||
inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
|
||||
// 512 base value, lut[513] only for calculate slope
|
||||
uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
|
||||
@@ -410,6 +446,23 @@ SaturatingRoundingMultiplyByPOTParam(
|
||||
SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent));
|
||||
}
|
||||
|
||||
// Convert int32_t multiplier to int16_t with rounding.
|
||||
inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t,
|
||||
int16_t* multiplier_int16_t) {
|
||||
TFLITE_DCHECK_GE(multiplier_int32_t, 0);
|
||||
static constexpr int32_t kRoundingOffset = 1 << 15;
|
||||
if (multiplier_int32_t >=
|
||||
std::numeric_limits<int32_t>::max() - kRoundingOffset) {
|
||||
*multiplier_int16_t = std::numeric_limits<int16_t>::max();
|
||||
return;
|
||||
}
|
||||
const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16;
|
||||
TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset);
|
||||
TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset);
|
||||
*multiplier_int16_t = result;
|
||||
TFLITE_DCHECK_EQ(*multiplier_int16_t, result);
|
||||
}
|
||||
|
||||
// Minimum output bits to accommodate log of maximum input range. It actually
|
||||
// does not matter if one considers, say, [-64,64] or [-64,64).
|
||||
//
|
||||
@@ -418,15 +471,13 @@ SaturatingRoundingMultiplyByPOTParam(
|
||||
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ...
|
||||
// ceil(log(abs( log(2.^(0:127))+1 ))/log(2))]
|
||||
constexpr int min_log_x_output_bits(int input_bits) {
|
||||
return input_bits > 90
|
||||
? 7
|
||||
: input_bits > 44
|
||||
? 6
|
||||
: input_bits > 21
|
||||
? 5
|
||||
: input_bits > 10
|
||||
? 4
|
||||
: input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1;
|
||||
return input_bits > 90 ? 7
|
||||
: input_bits > 44 ? 6
|
||||
: input_bits > 21 ? 5
|
||||
: input_bits > 10 ? 4
|
||||
: input_bits > 4 ? 3
|
||||
: input_bits > 1 ? 2
|
||||
: 1;
|
||||
}
|
||||
|
||||
// Although currently the name of this function says that it cannot handle
|
||||
@@ -434,17 +485,17 @@ constexpr int min_log_x_output_bits(int input_bits) {
|
||||
// x_max is the largest representable input. In other words, the output range
|
||||
// is symmetric.
|
||||
template <int OutputIntegerBits, int InputIntegerBits>
|
||||
inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
|
||||
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
|
||||
log_x_for_x_greater_than_or_equal_to_1_impl(
|
||||
gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
|
||||
// assert(__builtin_clz(0u) >= std::numeric_limits<uint32>::digits - 1);
|
||||
// assert(__builtin_clz(0u) <= std::numeric_limits<uint32>::digits);
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
|
||||
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
|
||||
// assert(__builtin_clz(0u) >= std::numeric_limits<uint32_t>::digits - 1);
|
||||
// assert(__builtin_clz(0u) <= std::numeric_limits<uint32_t>::digits);
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
|
||||
// The reason for accumulating the result with an extra bit of headroom is
|
||||
// that z_pow_2_adj * log_2 might be saturated, and adding num_scaled *
|
||||
// recip_denom will otherwise introduce an error.
|
||||
static constexpr int kAccumIntegerBits = OutputIntegerBits + 1;
|
||||
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumIntegerBits>;
|
||||
using FixedPointAccum = gemmlowp::FixedPoint<int32_t, kAccumIntegerBits>;
|
||||
|
||||
const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(
|
||||
FixedPoint0, 1488522236, std::log(2.0));
|
||||
@@ -472,10 +523,10 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
|
||||
// required shift "ourselves" instead of using, say, Rescale.
|
||||
FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw());
|
||||
// z_a_pow_2 = input_integer_bits - z_a_headroom;
|
||||
int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32>(z_a.raw()));
|
||||
int z_a_headroom_plus_1 = CountLeadingZeros(static_cast<uint32_t>(z_a.raw()));
|
||||
FixedPoint0 r_a_tmp =
|
||||
SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1));
|
||||
const int32 r_a_raw =
|
||||
const int32_t r_a_raw =
|
||||
SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1);
|
||||
// z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25);
|
||||
// z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25,
|
||||
@@ -487,8 +538,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
|
||||
|
||||
// z_b is treated like z_a, but premultiplying by sqrt(0.5).
|
||||
FixedPoint0 z_b = z_a * sqrt_half;
|
||||
int z_b_headroom = CountLeadingZeros(static_cast<uint32>(z_b.raw())) - 1;
|
||||
const int32 r_b_raw =
|
||||
int z_b_headroom = CountLeadingZeros(static_cast<uint32_t>(z_b.raw())) - 1;
|
||||
const int32_t r_b_raw =
|
||||
SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
|
||||
const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
|
||||
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
|
||||
@@ -516,9 +567,9 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
|
||||
}
|
||||
|
||||
template <int OutputIntegerBits, int InputIntegerBits>
|
||||
inline gemmlowp::FixedPoint<int32, OutputIntegerBits>
|
||||
inline gemmlowp::FixedPoint<int32_t, OutputIntegerBits>
|
||||
log_x_for_x_greater_than_or_equal_to_1(
|
||||
gemmlowp::FixedPoint<int32, InputIntegerBits> input_val) {
|
||||
gemmlowp::FixedPoint<int32_t, InputIntegerBits> input_val) {
|
||||
static_assert(
|
||||
OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits),
|
||||
"Output integer bits must be sufficient to accommodate logs of inputs.");
|
||||
@@ -527,25 +578,25 @@ log_x_for_x_greater_than_or_equal_to_1(
|
||||
input_val);
|
||||
}
|
||||
|
||||
inline int32 GetReciprocal(int32 x, int x_integer_digits,
|
||||
int* num_bits_over_unit) {
|
||||
int headroom_plus_one = CountLeadingZeros(static_cast<uint32>(x));
|
||||
inline int32_t GetReciprocal(int32_t x, int x_integer_digits,
|
||||
int* num_bits_over_unit) {
|
||||
int headroom_plus_one = CountLeadingZeros(static_cast<uint32_t>(x));
|
||||
// This is the number of bits to the left of the binary point above 1.0.
|
||||
// Consider x=1.25. In that case shifted_scale=0.8 and
|
||||
// no later adjustment will be needed.
|
||||
*num_bits_over_unit = x_integer_digits - headroom_plus_one;
|
||||
const int32 shifted_sum_minus_one =
|
||||
static_cast<int32>((static_cast<uint32>(x) << headroom_plus_one) -
|
||||
(static_cast<uint32>(1) << 31));
|
||||
const int32_t shifted_sum_minus_one =
|
||||
static_cast<int32_t>((static_cast<uint32_t>(x) << headroom_plus_one) -
|
||||
(static_cast<uint32_t>(1) << 31));
|
||||
|
||||
gemmlowp::FixedPoint<int32, 0> shifted_scale =
|
||||
gemmlowp::FixedPoint<int32_t, 0> shifted_scale =
|
||||
gemmlowp::one_over_one_plus_x_for_x_in_0_1(
|
||||
gemmlowp::FixedPoint<int32, 0>::FromRaw(shifted_sum_minus_one));
|
||||
gemmlowp::FixedPoint<int32_t, 0>::FromRaw(shifted_sum_minus_one));
|
||||
return shifted_scale.raw();
|
||||
}
|
||||
|
||||
inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
|
||||
int32* output_inv_sqrt,
|
||||
inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
|
||||
int32_t* output_inv_sqrt,
|
||||
int* output_shift) {
|
||||
TFLITE_DCHECK_GE(input, 0);
|
||||
if (input <= 1) {
|
||||
@@ -565,7 +616,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
|
||||
++*output_shift;
|
||||
}
|
||||
const unsigned max_left_shift_bits =
|
||||
CountLeadingZeros(static_cast<uint32>(input)) - 1;
|
||||
CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
|
||||
const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
|
||||
const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
|
||||
*output_shift -= left_shift_bit_pairs;
|
||||
@@ -577,8 +628,8 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
|
||||
using gemmlowp::SaturatingRoundingMultiplyByPOT;
|
||||
// Using 3 integer bits gives us enough room for the internal arithmetic in
|
||||
// this Newton-Raphson iteration.
|
||||
using F3 = FixedPoint<int32, 3>;
|
||||
using F0 = FixedPoint<int32, 0>;
|
||||
using F3 = FixedPoint<int32_t, 3>;
|
||||
using F0 = FixedPoint<int32_t, 0>;
|
||||
const F3 fixedpoint_input = F3::FromRaw(input >> 1);
|
||||
const F3 fixedpoint_half_input =
|
||||
SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
|
||||
@@ -645,6 +696,13 @@ inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) {
|
||||
indexes[4] * desc.strides[4];
|
||||
}
|
||||
|
||||
inline int SubscriptToIndex(const NdArrayDesc<8>& desc, int indexes[8]) {
|
||||
return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] +
|
||||
indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] +
|
||||
indexes[4] * desc.strides[4] + indexes[5] * desc.strides[5] +
|
||||
indexes[6] * desc.strides[6] + indexes[7] * desc.strides[7];
|
||||
}
|
||||
|
||||
// Given the dimensions of the operands for an element-wise binary broadcast,
|
||||
// adjusts them so that they can be directly iterated over with simple loops.
|
||||
// Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and
|
||||
|
||||
@@ -76,13 +76,15 @@ limitations under the License.
|
||||
#define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT
|
||||
#endif
|
||||
|
||||
// TODO(ahentz): Clean up.
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// TODO(b/162019032): Consider removing these type-aliases.
|
||||
using int8 = std::int8_t;
|
||||
using uint8 = std::uint8_t;
|
||||
using int16 = std::int16_t;
|
||||
using uint16 = std::uint16_t;
|
||||
using int32 = std::int32_t;
|
||||
using uint32 = std::uint32_t;
|
||||
#endif // !defined(TF_LITE_STATIC_MEMORY)
|
||||
|
||||
// TFLITE_DEPRECATED()
|
||||
//
|
||||
|
||||
@@ -19,8 +19,9 @@ limitations under the License.
|
||||
|
||||
namespace tflite {
|
||||
|
||||
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
|
||||
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO)
|
||||
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
|
||||
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
|
||||
defined(__ZEPHYR__)
|
||||
#define TF_LITE_GLOBAL_STD_PREFIX
|
||||
#else
|
||||
#define TF_LITE_GLOBAL_STD_PREFIX std
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -12,16 +12,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
// Optional debugging functionality. For small sized binaries, these are not
|
||||
// needed.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
|
||||
|
||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||
#include <cmath>
|
||||
|
||||
namespace tflite {
|
||||
// Prints a dump of what tensors and what nodes are in the interpreter.
|
||||
void PrintInterpreterState(MicroInterpreter* interpreter);
|
||||
|
||||
#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
|
||||
inline float TfLiteMax(const float& x, const float& y) {
|
||||
return std::max(x, y);
|
||||
}
|
||||
#else
|
||||
template <class T>
|
||||
inline T TfLiteMax(const T& x, const T& y) {
|
||||
return std::fmax(x, y);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MICRO_OPTIONAL_DEBUG_TOOLS_H_
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
|
||||
35
code/lib/tfmicro/tensorflow/lite/kernels/internal/min.h
Normal file
35
code/lib/tfmicro/tensorflow/lite/kernels/internal/min.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace tflite {
|
||||
|
||||
#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
|
||||
inline float TfLiteMin(const float& x, const float& y) {
|
||||
return std::min(x, y);
|
||||
}
|
||||
#else
|
||||
template <class T>
|
||||
inline T TfLiteMin(const T& x, const T& y) {
|
||||
return std::fmin(x, y);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
|
||||
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_H_
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
|
||||
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
@@ -21,7 +21,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/string_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -76,12 +75,12 @@ class VectorOfTensors {
|
||||
|
||||
// A list of quantized tensors in a format that can be used by kernels like
|
||||
// split and concatenation.
|
||||
class VectorOfQuantizedTensors : public VectorOfTensors<uint8> {
|
||||
class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t> {
|
||||
public:
|
||||
// Build with the tensors in 'tensor_list'.
|
||||
VectorOfQuantizedTensors(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list)
|
||||
: VectorOfTensors<uint8>(context, tensor_list) {
|
||||
: VectorOfTensors<uint8_t>(context, tensor_list) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
zero_point_.push_back(t->params.zero_point);
|
||||
@@ -90,10 +89,10 @@ class VectorOfQuantizedTensors : public VectorOfTensors<uint8> {
|
||||
}
|
||||
|
||||
const float* scale() const { return scale_.data(); }
|
||||
const int32* zero_point() const { return zero_point_.data(); }
|
||||
const int32_t* zero_point() const { return zero_point_.data(); }
|
||||
|
||||
private:
|
||||
std::vector<int32> zero_point_;
|
||||
std::vector<int32_t> zero_point_;
|
||||
std::vector<float> scale_;
|
||||
};
|
||||
|
||||
@@ -119,26 +118,6 @@ class SequentialTensorWriter {
|
||||
T* output_ptr_;
|
||||
};
|
||||
|
||||
template <>
|
||||
class SequentialTensorWriter<string> {
|
||||
public:
|
||||
SequentialTensorWriter(const TfLiteTensor* input, TfLiteTensor* output)
|
||||
: input_(input), output_(output) {}
|
||||
~SequentialTensorWriter() { buffer_.WriteToTensor(output_, nullptr); }
|
||||
|
||||
void Write(int position) { this->WriteN(position, 1); }
|
||||
void WriteN(int position, int len) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
buffer_.AddString(GetString(input_, position + i));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const TfLiteTensor* input_;
|
||||
TfLiteTensor* output_;
|
||||
DynamicBuffer buffer_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_H_
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_H_
|
||||
@@ -342,13 +342,13 @@ void NudgeQuantizationRange(const float min, const float max,
|
||||
const float quant_max_float = static_cast<float>(quant_max);
|
||||
*nudged_scale = (max - min) / (quant_max_float - quant_min_float);
|
||||
const float zero_point_from_min = quant_min_float - min / *nudged_scale;
|
||||
uint16 nudged_zero_point;
|
||||
uint16_t nudged_zero_point;
|
||||
if (zero_point_from_min < quant_min_float) {
|
||||
nudged_zero_point = static_cast<uint16>(quant_min);
|
||||
nudged_zero_point = static_cast<uint16_t>(quant_min);
|
||||
} else if (zero_point_from_min > quant_max_float) {
|
||||
nudged_zero_point = static_cast<uint16>(quant_max);
|
||||
nudged_zero_point = static_cast<uint16_t>(quant_max);
|
||||
} else {
|
||||
nudged_zero_point = static_cast<uint16>(TfLiteRound(zero_point_from_min));
|
||||
nudged_zero_point = static_cast<uint16_t>(TfLiteRound(zero_point_from_min));
|
||||
}
|
||||
*nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale);
|
||||
*nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale);
|
||||
|
||||
@@ -51,34 +51,39 @@ inline void Add(const ArithmeticParams& params,
|
||||
|
||||
// Element-wise add that can often be used for inner loop of broadcast add as
|
||||
// well as the non-broadcast add.
|
||||
|
||||
// This function is used for 8-bit as well as for 16-bit, but the accumulator
|
||||
// is 32-bit for both cases. The overflow does not happen due to the
|
||||
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
|
||||
template <typename T>
|
||||
inline void AddElementwise(int size, const ArithmeticParams& params,
|
||||
const uint8* input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
const T* input1_data, const T* input2_data,
|
||||
T* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
|
||||
TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
|
||||
TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
output_data[i] = static_cast<T>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,40 +91,40 @@ inline void AddElementwise(int size, const ArithmeticParams& params,
|
||||
// broadcast add, so that, for example, scalar-broadcast with batch will still
|
||||
// be fast.
|
||||
inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
|
||||
uint8 input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
uint8_t input1_data, const uint8_t* input2_data,
|
||||
uint8_t* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
|
||||
const int32 input1_val = params.input1_offset + input1_data;
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = params.input1_offset + input1_data;
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
output_data[i] = static_cast<uint8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8* input2_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
const RuntimeShape& input1_shape, const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape, uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
@@ -132,24 +137,53 @@ inline void Add(const ArithmeticParams& params,
|
||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void AddGeneralParamScale(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int16_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int16_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
int max_value = std::numeric_limits<int16_t>::max();
|
||||
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -max_value);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -max_value);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, max_value);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, max_value);
|
||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int16* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16* input2_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
const RuntimeShape& input1_shape, const int16_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16_t* input2_data,
|
||||
const RuntimeShape& output_shape, int16_t* output_data,
|
||||
bool pot_scale = true) {
|
||||
if (!pot_scale) {
|
||||
AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
|
||||
input2_data, output_shape, output_data);
|
||||
return;
|
||||
}
|
||||
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
|
||||
const int input1_shift = params.input1_shift;
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
const int16 output_activation_min = params.quantized_activation_min;
|
||||
const int16 output_activation_max = params.quantized_activation_max;
|
||||
const int16_t output_activation_min = params.quantized_activation_min;
|
||||
const int16_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
|
||||
TFLITE_DCHECK_LE(input1_shift, 0);
|
||||
TFLITE_DCHECK_LE(params.input2_shift, 0);
|
||||
const int16* not_shift_input = input1_shift == 0 ? input1_data : input2_data;
|
||||
const int16* shift_input = input1_shift == 0 ? input2_data : input1_data;
|
||||
const int16_t* not_shift_input =
|
||||
input1_shift == 0 ? input1_data : input2_data;
|
||||
const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data;
|
||||
const int input_right_shift =
|
||||
input1_shift == 0 ? -params.input2_shift : -input1_shift;
|
||||
|
||||
@@ -161,8 +195,8 @@ inline void Add(const ArithmeticParams& params,
|
||||
F0 scaled_input = F0::FromRaw(
|
||||
gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
|
||||
F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled);
|
||||
const int16 raw_output = result.raw();
|
||||
const int16 clamped_output = std::min(
|
||||
const int16_t raw_output = result.raw();
|
||||
const int16_t clamped_output = std::min(
|
||||
output_activation_max, std::max(output_activation_min, raw_output));
|
||||
output_data[i] = clamped_output;
|
||||
}
|
||||
@@ -218,11 +252,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
|
||||
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const int32_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const int32_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
int32_t* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
@@ -257,13 +291,14 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const uint8* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
// This function is used for 8-bit as well as for 16-bit, but the accumulator
|
||||
// is 32-bit for both cases. The overflow does not happen due to the
|
||||
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
|
||||
template <typename T>
|
||||
inline void BroadcastAdd4DSlow(
|
||||
const ArithmeticParams& params, const RuntimeShape& input1_shape,
|
||||
const T* input1_data, const RuntimeShape& input2_shape,
|
||||
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
@@ -286,34 +321,34 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
const int32_t input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
const int32_t input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
const int32 shifted_input1_val =
|
||||
const int32_t shifted_input1_val =
|
||||
input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val =
|
||||
const int32_t shifted_input2_val =
|
||||
input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier,
|
||||
params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier,
|
||||
params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
static_cast<T>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -322,11 +357,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
|
||||
|
||||
inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
|
||||
const RuntimeShape& unswitched_input1_shape,
|
||||
const uint8* unswitched_input1_data,
|
||||
const uint8_t* unswitched_input1_data,
|
||||
const RuntimeShape& unswitched_input2_shape,
|
||||
const uint8* unswitched_input2_data,
|
||||
const uint8_t* unswitched_input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
uint8_t* output_data) {
|
||||
ArithmeticParams switched_params = unswitched_params;
|
||||
switched_params.input1_offset = unswitched_params.input2_offset;
|
||||
switched_params.input1_multiplier = unswitched_params.input2_multiplier;
|
||||
@@ -341,18 +376,18 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
|
||||
|
||||
const ArithmeticParams& params =
|
||||
use_unswitched ? unswitched_params : switched_params;
|
||||
const uint8* input1_data =
|
||||
const uint8_t* input1_data =
|
||||
use_unswitched ? unswitched_input1_data : unswitched_input2_data;
|
||||
const uint8* input2_data =
|
||||
const uint8_t* input2_data =
|
||||
use_unswitched ? unswitched_input2_data : unswitched_input1_data;
|
||||
|
||||
// Fivefold nested loops. The second input resets its position for each
|
||||
// iteration of the second loop. The first input resets its position at the
|
||||
// beginning of the fourth loop. The innermost loop is an elementwise add of
|
||||
// sections of the arrays.
|
||||
uint8* output_data_ptr = output_data;
|
||||
const uint8* input1_data_ptr = input1_data;
|
||||
const uint8* input2_data_reset = input2_data;
|
||||
uint8_t* output_data_ptr = output_data;
|
||||
const uint8_t* input1_data_ptr = input1_data;
|
||||
const uint8_t* input2_data_reset = input2_data;
|
||||
// In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
|
||||
// between input shapes. y3 for input 1 is always broadcast, and so the
|
||||
// dimension there is 1, whereas optionally y1 might be broadcast for input 2.
|
||||
@@ -368,7 +403,7 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
|
||||
// General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
|
||||
// dimension.
|
||||
for (int i0 = 0; i0 < y0; ++i0) {
|
||||
const uint8* input2_data_ptr;
|
||||
const uint8_t* input2_data_ptr;
|
||||
for (int i1 = 0; i1 < y1; ++i1) {
|
||||
input2_data_ptr = input2_data_reset;
|
||||
for (int i2 = 0; i2 < y2; ++i2) {
|
||||
@@ -397,7 +432,7 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
|
||||
// for y4 == 1 and the loop over y3 is contained within the
|
||||
// AddScalarBroadcast function.
|
||||
for (int i0 = 0; i0 < y0; ++i0) {
|
||||
const uint8* input2_data_ptr;
|
||||
const uint8_t* input2_data_ptr;
|
||||
for (int i1 = 0; i1 < y1; ++i1) {
|
||||
input2_data_ptr = input2_data_reset;
|
||||
for (int i2 = 0; i2 < y2; ++i2) {
|
||||
|
||||
@@ -18,7 +18,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/string_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -51,18 +50,6 @@ inline bool LessEqualFn(T lhs, T rhs) {
|
||||
return lhs <= rhs;
|
||||
}
|
||||
|
||||
inline bool StringRefEqualFn(const StringRef& lhs, const StringRef& rhs) {
|
||||
if (lhs.len != rhs.len) return false;
|
||||
for (int i = 0; i < lhs.len; ++i) {
|
||||
if (lhs.str[i] != rhs.str[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool StringRefNotEqualFn(const StringRef& lhs, const StringRef& rhs) {
|
||||
return !StringRefEqualFn(lhs, rhs);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
using ComparisonFn = bool (*)(T, T);
|
||||
|
||||
@@ -78,22 +65,6 @@ inline void ComparisonImpl(
|
||||
}
|
||||
}
|
||||
|
||||
template <bool (*F)(const StringRef&, const StringRef&)>
|
||||
inline void ComparisonStringImpl(const RuntimeShape& input1_shape,
|
||||
const TfLiteTensor* input1,
|
||||
const RuntimeShape& input2_shape,
|
||||
const TfLiteTensor* input2,
|
||||
const RuntimeShape& output_shape,
|
||||
bool* output_data) {
|
||||
const int64_t flatsize =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
for (int64_t i = 0; i < flatsize; ++i) {
|
||||
const auto lhs = GetString(input1, i);
|
||||
const auto rhs = GetString(input2, i);
|
||||
output_data[i] = F(lhs, rhs);
|
||||
}
|
||||
}
|
||||
|
||||
template <ComparisonFn<float> F>
|
||||
inline void Comparison(const ComparisonParams& op_params,
|
||||
const RuntimeShape& input1_shape,
|
||||
@@ -105,30 +76,30 @@ inline void Comparison(const ComparisonParams& op_params,
|
||||
input2_data, output_shape, output_data);
|
||||
}
|
||||
|
||||
template <typename T, ComparisonFn<int32> F>
|
||||
template <typename T, ComparisonFn<int32_t> F>
|
||||
inline void ComparisonWithScaling(
|
||||
const ComparisonParams& op_params, const RuntimeShape& input1_shape,
|
||||
const T* input1_data, const RuntimeShape& input2_shape,
|
||||
const T* input2_data, const RuntimeShape& output_shape, bool* output_data) {
|
||||
int left_shift = op_params.left_shift;
|
||||
int32 input1_offset = op_params.input1_offset;
|
||||
int32 input1_multiplier = op_params.input1_multiplier;
|
||||
int32_t input1_offset = op_params.input1_offset;
|
||||
int32_t input1_multiplier = op_params.input1_multiplier;
|
||||
int input1_shift = op_params.input1_shift;
|
||||
int32 input2_offset = op_params.input2_offset;
|
||||
int32 input2_multiplier = op_params.input2_multiplier;
|
||||
int32_t input2_offset = op_params.input2_offset;
|
||||
int32_t input2_multiplier = op_params.input2_multiplier;
|
||||
int input2_shift = op_params.input2_shift;
|
||||
|
||||
const int64_t flatsize =
|
||||
MatchingFlatSize(input1_shape, input2_shape, output_shape);
|
||||
for (int64_t i = 0; i < flatsize; ++i) {
|
||||
const int32 input1_val = input1_offset + input1_data[i];
|
||||
const int32 input2_val = input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = input1_offset + input1_data[i];
|
||||
const int32_t input2_val = input2_offset + input2_data[i];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, input1_multiplier, input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, input2_multiplier, input2_shift);
|
||||
output_data[i] = F(scaled_input1_val, scaled_input2_val);
|
||||
@@ -180,31 +151,6 @@ inline void BroadcastComparison4DSlowImpl(
|
||||
}
|
||||
}
|
||||
|
||||
template <bool (*F)(const StringRef&, const StringRef&)>
|
||||
inline void BroadcastComparison4DSlowStringImpl(
|
||||
const RuntimeShape& unextended_input1_shape, const TfLiteTensor* input1,
|
||||
const RuntimeShape& unextended_input2_shape, const TfLiteTensor* input2,
|
||||
const RuntimeShape& unextended_output_shape, bool* output_data) {
|
||||
const BroadcastComparison4DSlowCommon dims =
|
||||
BroadcastComparison4DSlowPreprocess(unextended_input1_shape,
|
||||
unextended_input2_shape,
|
||||
unextended_output_shape);
|
||||
|
||||
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
|
||||
const auto lhs =
|
||||
GetString(input1, SubscriptToIndex(dims.desc1, b, y, x, c));
|
||||
const auto rhs =
|
||||
GetString(input2, SubscriptToIndex(dims.desc2, b, y, x, c));
|
||||
output_data[Offset(dims.output_shape, b, y, x, c)] = F(lhs, rhs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <ComparisonFn<float> F>
|
||||
inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
|
||||
const RuntimeShape& input1_shape,
|
||||
@@ -218,7 +164,7 @@ inline void BroadcastComparison4DSlow(const ComparisonParams& op_params,
|
||||
output_shape, output_data);
|
||||
}
|
||||
|
||||
template <typename T, ComparisonFn<int32> F>
|
||||
template <typename T, ComparisonFn<int32_t> F>
|
||||
inline void BroadcastComparison4DSlowWithScaling(
|
||||
const ComparisonParams& op_params,
|
||||
const RuntimeShape& unextended_input1_shape, const T* input1_data,
|
||||
@@ -230,29 +176,29 @@ inline void BroadcastComparison4DSlowWithScaling(
|
||||
unextended_output_shape);
|
||||
|
||||
int left_shift = op_params.left_shift;
|
||||
int32 input1_offset = op_params.input1_offset;
|
||||
int32 input1_multiplier = op_params.input1_multiplier;
|
||||
int32_t input1_offset = op_params.input1_offset;
|
||||
int32_t input1_multiplier = op_params.input1_multiplier;
|
||||
int input1_shift = op_params.input1_shift;
|
||||
int32 input2_offset = op_params.input2_offset;
|
||||
int32 input2_multiplier = op_params.input2_multiplier;
|
||||
int32_t input2_offset = op_params.input2_offset;
|
||||
int32_t input2_multiplier = op_params.input2_multiplier;
|
||||
int input2_shift = op_params.input2_shift;
|
||||
|
||||
for (int b = 0; b < dims.output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < dims.output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < dims.output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < dims.output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
const int32_t input1_val =
|
||||
input1_offset +
|
||||
input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
const int32_t input2_val =
|
||||
input2_offset +
|
||||
input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)];
|
||||
const int32 shifted_input1_val = input1_val * (1 << left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t shifted_input1_val = input1_val * (1 << left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, input1_multiplier, input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, input2_multiplier, input2_shift);
|
||||
output_data[Offset(dims.output_shape, b, y, x, c)] =
|
||||
|
||||
@@ -74,14 +74,14 @@ inline void Concatenation(const ConcatenationParams& params,
|
||||
// when optimizng this routine further.
|
||||
inline void ConcatenationWithScaling(const ConcatenationParams& params,
|
||||
const RuntimeShape* const* input_shapes,
|
||||
const uint8* const* input_data,
|
||||
const uint8_t* const* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
uint8_t* output_data) {
|
||||
int axis = params.axis;
|
||||
const int32* input_zeropoint = params.input_zeropoint;
|
||||
const int32_t* input_zeropoint = params.input_zeropoint;
|
||||
const float* input_scale = params.input_scale;
|
||||
int inputs_count = params.inputs_count;
|
||||
const int32 output_zeropoint = params.output_zeropoint;
|
||||
const int32_t output_zeropoint = params.output_zeropoint;
|
||||
const float output_scale = params.output_scale;
|
||||
|
||||
const int concat_dimensions = output_shape.DimensionsCount();
|
||||
@@ -110,11 +110,11 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params,
|
||||
}
|
||||
|
||||
const float inverse_output_scale = 1.f / output_scale;
|
||||
uint8* output_ptr = output_data;
|
||||
uint8_t* output_ptr = output_data;
|
||||
for (int k = 0; k < outer_size; k++) {
|
||||
for (int i = 0; i < inputs_count; ++i) {
|
||||
const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
|
||||
const uint8* input_ptr = input_data[i] + k * copy_size;
|
||||
const uint8_t* input_ptr = input_data[i] + k * copy_size;
|
||||
if (input_zeropoint[i] == output_zeropoint &&
|
||||
input_scale[i] == output_scale) {
|
||||
memcpy(output_ptr, input_ptr, copy_size);
|
||||
|
||||
@@ -59,28 +59,31 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
const int output_width = output_shape.Dims(2);
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
float total = 0.f;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
const int in_y = in_y_origin + dilation_height_factor * filter_y;
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
|
||||
if (!is_point_inside_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// If the location is outside the bounds of the input image,
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
float input_value = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
float filter_value =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
total += (input_value * filter_value);
|
||||
}
|
||||
float input_value = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
float filter_value = filter_data[Offset(
|
||||
filter_shape, out_channel, filter_y, filter_x, in_channel)];
|
||||
total += (input_value * filter_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -99,11 +102,11 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
}
|
||||
|
||||
inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data, const RuntimeShape& im2col_shape,
|
||||
uint8* im2col_data, void* cpu_backend_context) {
|
||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data, const RuntimeShape& im2col_shape,
|
||||
uint8_t* im2col_data, void* cpu_backend_context) {
|
||||
(void)cpu_backend_context; // only used in optimized code.
|
||||
(void)im2col_data; // only used in optimized code.
|
||||
(void)im2col_shape; // only used in optimized code.
|
||||
@@ -113,13 +116,13 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@@ -139,29 +142,32 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
const int output_width = output_shape.Dims(2);
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
const int in_y = in_y_origin + dilation_height_factor * filter_y;
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
|
||||
if (!is_point_inside_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// If the location is outside the bounds of the input image,
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
int32_t input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
acc +=
|
||||
(filter_val + filter_offset) * (input_val + input_offset);
|
||||
}
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, out_channel, filter_y, filter_x, in_channel)];
|
||||
acc +=
|
||||
(filter_val + filter_offset) * (input_val + input_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -174,7 +180,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
|
||||
static_cast<uint8>(acc);
|
||||
static_cast<uint8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -220,7 +226,7 @@ inline void HybridConvPerChannel(
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
@@ -231,9 +237,9 @@ inline void HybridConvPerChannel(
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
acc += filter_val * (input_val - input_offset[batch]);
|
||||
@@ -258,5 +264,4 @@ inline void HybridConvPerChannel(
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
||||
|
||||
@@ -62,21 +62,21 @@ namespace reference_ops {
|
||||
namespace depthwise_conv {
|
||||
|
||||
template <DepthwiseConvOutputRounding output_rounding>
|
||||
inline int32 DepthwiseConvRound(int32 x, int32 quantized_multiplier,
|
||||
int shift) {
|
||||
inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
|
||||
int shift) {
|
||||
TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone);
|
||||
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
|
||||
int32 x, int32 quantized_multiplier, int shift) {
|
||||
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
|
||||
int32_t x, int32_t quantized_multiplier, int shift) {
|
||||
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
|
||||
int32 x, int32 quantized_multiplier, int shift) {
|
||||
inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
|
||||
int32_t x, int32_t quantized_multiplier, int shift) {
|
||||
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
||||
const int left_shift = shift > 0 ? shift : 0;
|
||||
const int right_shift = shift > 0 ? 0 : -shift;
|
||||
@@ -89,13 +89,12 @@ inline int32 DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
|
||||
|
||||
template <DepthwiseConvOutputRounding output_rounding>
|
||||
struct DepthwiseConvBasicKernel {
|
||||
static inline void Run(const DepthwiseParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data,
|
||||
const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data,
|
||||
const RuntimeShape& bias_shape, const int32* bias_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
static inline void Run(
|
||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
@@ -103,12 +102,12 @@ struct DepthwiseConvBasicKernel {
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
@@ -135,7 +134,7 @@ struct DepthwiseConvBasicKernel {
|
||||
const int oc = m + ic * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x =
|
||||
@@ -146,9 +145,9 @@ struct DepthwiseConvBasicKernel {
|
||||
// use zero as a default value.
|
||||
if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height)) {
|
||||
int32 input_val =
|
||||
int32_t input_val =
|
||||
input_data[Offset(input_shape, b, in_y, in_x, ic)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, oc)];
|
||||
acc += (filter_val + filter_offset) *
|
||||
(input_val + input_offset);
|
||||
@@ -164,7 +163,7 @@ struct DepthwiseConvBasicKernel {
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[Offset(output_shape, b, out_y, out_x, oc)] =
|
||||
static_cast<uint8>(acc);
|
||||
static_cast<uint8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -176,10 +175,10 @@ struct DepthwiseConvBasicKernel {
|
||||
// MultiplyByQuantizedMultiplier or DepthwiseConvRound function.
|
||||
static inline void RunPerChannel(
|
||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
// Get parameters.
|
||||
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
|
||||
const int stride_width = params.stride_width;
|
||||
@@ -189,12 +188,12 @@ struct DepthwiseConvBasicKernel {
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32* output_multiplier = params.output_multiplier_per_channel;
|
||||
const int32* output_shift = params.output_shift_per_channel;
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
const int32_t* output_multiplier = params.output_multiplier_per_channel;
|
||||
const int32_t* output_shift = params.output_shift_per_channel;
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@@ -222,7 +221,7 @@ struct DepthwiseConvBasicKernel {
|
||||
const int output_channel = m + in_channel * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x =
|
||||
@@ -234,17 +233,18 @@ struct DepthwiseConvBasicKernel {
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
// Accumulate with 32 bits accumulator.
|
||||
// In the nudging process during model quantization, we
|
||||
// force real value of 0.0 be represented by a quantized
|
||||
// value. This guarantees that the input_offset is a int8,
|
||||
// even though it is represented using int32. int32 += int8
|
||||
// * (int8 - int8) so the highest value we can get from each
|
||||
// accumulation is [-127, 127] * ([-128, 127] -
|
||||
// value. This guarantees that the input_offset is a int8_t,
|
||||
// even though it is represented using int32_t. int32_t +=
|
||||
// int8_t
|
||||
// * (int8_t - int8_t) so the highest value we can get from
|
||||
// each accumulation is [-127, 127] * ([-128, 127] -
|
||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
||||
// = 14.98, which means we can accumulate at least 2^16
|
||||
// multiplications without overflow. The accumulator is
|
||||
@@ -279,10 +279,10 @@ struct DepthwiseConvBasicKernel {
|
||||
|
||||
inline void DepthwiseConv(
|
||||
const DepthwiseParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
return depthwise_conv::DepthwiseConvBasicKernel<
|
||||
DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape,
|
||||
input_data, filter_shape,
|
||||
|
||||
@@ -32,12 +32,12 @@ inline void Dequantize(const tflite::DequantizationParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const InputT* input_data,
|
||||
const RuntimeShape& output_shape, OutputT* output_data) {
|
||||
int32 zero_point = op_params.zero_point;
|
||||
int32_t zero_point = op_params.zero_point;
|
||||
const double scale = op_params.scale;
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
const int32 val = input_data[i];
|
||||
const int32_t val = input_data[i];
|
||||
const OutputT result = static_cast<OutputT>(scale * (val - zero_point));
|
||||
output_data[i] = result;
|
||||
}
|
||||
@@ -52,11 +52,11 @@ inline void PerChannelDequantize(
|
||||
// Ensure flat size is same.
|
||||
MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
const int32* zero_point = op_params.zero_point;
|
||||
const int32_t* zero_point = op_params.zero_point;
|
||||
const float* scale = op_params.scale;
|
||||
const int32 quantized_dimension = op_params.quantized_dimension;
|
||||
const int32 num_dims = input_shape.DimensionsCount();
|
||||
const int32* dims_data = input_shape.DimsData();
|
||||
const int32_t quantized_dimension = op_params.quantized_dimension;
|
||||
const int32_t num_dims = input_shape.DimensionsCount();
|
||||
const int32_t* dims_data = input_shape.DimsData();
|
||||
std::vector<int> current_dim(num_dims, 0);
|
||||
|
||||
do {
|
||||
@@ -64,7 +64,7 @@ inline void PerChannelDequantize(
|
||||
ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
|
||||
current_dim.data(), 0, nullptr);
|
||||
const int channel = current_dim[quantized_dimension];
|
||||
const int32 val = input_data[offset];
|
||||
const int32_t val = input_data[offset];
|
||||
const float result =
|
||||
static_cast<float>(scale[channel] * (val - zero_point[channel]));
|
||||
output_data[offset] = result;
|
||||
|
||||
@@ -61,17 +61,17 @@ inline void FullyConnected(
|
||||
|
||||
inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
|
||||
|
||||
@@ -89,10 +89,10 @@ inline void FullyConnected(
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32 input_val = input_data[b * accum_depth + d];
|
||||
int32 filter_val = filter_data[out_c * accum_depth + d];
|
||||
int32_t input_val = input_data[b * accum_depth + d];
|
||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += (filter_val + filter_offset) * (input_val + input_offset);
|
||||
}
|
||||
if (bias_data) {
|
||||
@@ -102,24 +102,24 @@ inline void FullyConnected(
|
||||
acc += output_offset;
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[out_c + output_depth * b] = static_cast<uint8>(acc);
|
||||
output_data[out_c + output_depth * b] = static_cast<uint8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const uint8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const uint8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
TFLITE_DCHECK_EQ(output_offset, 0);
|
||||
@@ -138,20 +138,21 @@ inline void FullyConnected(
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
// Internal accumulation.
|
||||
// Initialize accumulator with the bias-value.
|
||||
int32 accum = bias_data[out_c];
|
||||
int32_t accum = bias_data[out_c];
|
||||
// Accumulation loop.
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int16 input_val = input_data[b * accum_depth + d] + input_offset;
|
||||
int16 filter_val = filter_data[out_c * accum_depth + d] + filter_offset;
|
||||
int16_t input_val = input_data[b * accum_depth + d] + input_offset;
|
||||
int16_t filter_val =
|
||||
filter_data[out_c * accum_depth + d] + filter_offset;
|
||||
accum += filter_val * input_val;
|
||||
}
|
||||
// Down-scale the final int32 accumulator to the scale used by our
|
||||
// Down-scale the final int32_t accumulator to the scale used by our
|
||||
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
|
||||
// multiplier and shift here have been pre-computed offline
|
||||
// (e.g. by toco).
|
||||
accum =
|
||||
MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift);
|
||||
// Saturate, cast to int16, and store to output array.
|
||||
// Saturate, cast to int16_t, and store to output array.
|
||||
accum = std::max(accum, output_activation_min - output_offset);
|
||||
accum = std::min(accum, output_activation_max - output_offset);
|
||||
accum += output_offset;
|
||||
@@ -162,14 +163,14 @@ inline void FullyConnected(
|
||||
|
||||
inline void ShuffledFullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& weights_shape,
|
||||
const uint8* shuffled_weights_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data, uint8* shuffled_input_workspace_data) {
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const uint8_t* input_data, const RuntimeShape& weights_shape,
|
||||
const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data, uint8_t* shuffled_input_workspace_data) {
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
|
||||
TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1);
|
||||
@@ -190,7 +191,7 @@ inline void ShuffledFullyConnected(
|
||||
TFLITE_DCHECK((output_depth % 4) == 0);
|
||||
|
||||
// Shuffling and xoring of input activations into the workspace buffer
|
||||
uint8* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
|
||||
uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data;
|
||||
if (batches == 1) {
|
||||
for (int i = 0; i < accum_depth; i++) {
|
||||
shuffled_input_workspace_data[i] = input_data[i] ^ 0x80;
|
||||
@@ -198,13 +199,13 @@ inline void ShuffledFullyConnected(
|
||||
} else if (batches == 4) {
|
||||
for (int c = 0; c < accum_depth; c += 16) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
const uint8* src_data_ptr = input_data + b * accum_depth + c;
|
||||
const uint8_t* src_data_ptr = input_data + b * accum_depth + c;
|
||||
for (int j = 0; j < 16; j++) {
|
||||
uint8 src_val = *src_data_ptr++;
|
||||
uint8_t src_val = *src_data_ptr++;
|
||||
// Flip the sign bit, so that the kernel will only need to
|
||||
// reinterpret these uint8 values as int8, getting for free the
|
||||
// reinterpret these uint8_t values as int8_t, getting for free the
|
||||
// subtraction of the zero_point value 128.
|
||||
uint8 dst_val = src_val ^ 0x80;
|
||||
uint8_t dst_val = src_val ^ 0x80;
|
||||
*shuffled_input_workspace_ptr++ = dst_val;
|
||||
}
|
||||
}
|
||||
@@ -216,62 +217,62 @@ inline void ShuffledFullyConnected(
|
||||
|
||||
// Actual computation
|
||||
if (batches == 1) {
|
||||
int16* output_ptr = output_data;
|
||||
int16_t* output_ptr = output_data;
|
||||
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
|
||||
// so that just reinterpreting them as int8 values is equivalent to
|
||||
// so that just reinterpreting them as int8_t values is equivalent to
|
||||
// subtracting 128 from them, thus implementing for free the subtraction of
|
||||
// the zero_point value 128.
|
||||
const int8* shuffled_weights_ptr =
|
||||
reinterpret_cast<const int8*>(shuffled_weights_data);
|
||||
const int8_t* shuffled_weights_ptr =
|
||||
reinterpret_cast<const int8_t*>(shuffled_weights_data);
|
||||
// Likewise, we preshuffled and pre-xored the input data above.
|
||||
const int8* shuffled_input_data =
|
||||
reinterpret_cast<const int8*>(shuffled_input_workspace_data);
|
||||
const int8_t* shuffled_input_data =
|
||||
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
|
||||
for (int c = 0; c < output_depth; c += 4) {
|
||||
// Internal accumulation.
|
||||
// Initialize accumulator with the bias-value.
|
||||
int32 accum[4] = {0};
|
||||
int32_t accum[4] = {0};
|
||||
// Accumulation loop.
|
||||
for (int d = 0; d < accum_depth; d += 16) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 16; j++) {
|
||||
int8 input_val = shuffled_input_data[d + j];
|
||||
int8 weights_val = *shuffled_weights_ptr++;
|
||||
int8_t input_val = shuffled_input_data[d + j];
|
||||
int8_t weights_val = *shuffled_weights_ptr++;
|
||||
accum[i] += weights_val * input_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 4; i++) {
|
||||
// Add bias value
|
||||
int32 acc = accum[i] + bias_data[c + i];
|
||||
// Down-scale the final int32 accumulator to the scale used by our
|
||||
int32_t acc = accum[i] + bias_data[c + i];
|
||||
// Down-scale the final int32_t accumulator to the scale used by our
|
||||
// (16-bit, typically 3 integer bits) fixed-point format. The quantized
|
||||
// multiplier and shift here have been pre-computed offline
|
||||
// (e.g. by toco).
|
||||
acc =
|
||||
MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
|
||||
// Saturate, cast to int16, and store to output array.
|
||||
// Saturate, cast to int16_t, and store to output array.
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_ptr[c + i] = acc;
|
||||
}
|
||||
}
|
||||
} else if (batches == 4) {
|
||||
int16* output_ptr = output_data;
|
||||
int16_t* output_ptr = output_data;
|
||||
// Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd)
|
||||
// so that just reinterpreting them as int8 values is equivalent to
|
||||
// so that just reinterpreting them as int8_t values is equivalent to
|
||||
// subtracting 128 from them, thus implementing for free the subtraction of
|
||||
// the zero_point value 128.
|
||||
const int8* shuffled_weights_ptr =
|
||||
reinterpret_cast<const int8*>(shuffled_weights_data);
|
||||
const int8_t* shuffled_weights_ptr =
|
||||
reinterpret_cast<const int8_t*>(shuffled_weights_data);
|
||||
// Likewise, we preshuffled and pre-xored the input data above.
|
||||
const int8* shuffled_input_data =
|
||||
reinterpret_cast<const int8*>(shuffled_input_workspace_data);
|
||||
const int8_t* shuffled_input_data =
|
||||
reinterpret_cast<const int8_t*>(shuffled_input_workspace_data);
|
||||
for (int c = 0; c < output_depth; c += 4) {
|
||||
const int8* shuffled_input_ptr = shuffled_input_data;
|
||||
const int8_t* shuffled_input_ptr = shuffled_input_data;
|
||||
// Accumulation loop.
|
||||
// Internal accumulation.
|
||||
// Initialize accumulator with the bias-value.
|
||||
int32 accum[4][4];
|
||||
int32_t accum[4][4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
accum[i][b] = 0;
|
||||
@@ -281,8 +282,8 @@ inline void ShuffledFullyConnected(
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
for (int j = 0; j < 16; j++) {
|
||||
int8 input_val = shuffled_input_ptr[16 * b + j];
|
||||
int8 weights_val = shuffled_weights_ptr[16 * i + j];
|
||||
int8_t input_val = shuffled_input_ptr[16 * b + j];
|
||||
int8_t weights_val = shuffled_weights_ptr[16 * i + j];
|
||||
accum[i][b] += weights_val * input_val;
|
||||
}
|
||||
}
|
||||
@@ -293,14 +294,14 @@ inline void ShuffledFullyConnected(
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int b = 0; b < 4; b++) {
|
||||
// Add bias value
|
||||
int32 acc = accum[i][b] + bias_data[c + i];
|
||||
// Down-scale the final int32 accumulator to the scale used by our
|
||||
int32_t acc = accum[i][b] + bias_data[c + i];
|
||||
// Down-scale the final int32_t accumulator to the scale used by our
|
||||
// (16-bit, typically 3 integer bits) fixed-point format. The
|
||||
// quantized multiplier and shift here have been pre-computed offline
|
||||
// (e.g. by toco).
|
||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier,
|
||||
output_shift);
|
||||
// Saturate, cast to int16, and store to output array.
|
||||
// Saturate, cast to int16_t, and store to output array.
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_ptr[b * output_depth + c + i] = acc;
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
|
||||
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
inline int16_t SaturatingLeftShift(int16_t value, int amount) {
|
||||
int32_t result = static_cast<int32_t>(value) * (1 << amount);
|
||||
result = std::min<int32_t>(result, std::numeric_limits<int16_t>::max());
|
||||
result = std::max<int32_t>(result, std::numeric_limits<int16_t>::min());
|
||||
return result;
|
||||
}
|
||||
|
||||
// Similar to ARM instruction SQDMULH.
|
||||
// Similar to gemmlowp::SaturatingRoundingDoublingHighMul except
|
||||
// rounding to zero instead of to nearest (SQRDMULH).
|
||||
inline std::int16_t SaturatingDoublingHighMul(std::int16_t a, std::int16_t b) {
|
||||
bool overflow = a == b && a == std::numeric_limits<std::int16_t>::min();
|
||||
std::int32_t a_32(a);
|
||||
std::int32_t b_32(b);
|
||||
std::int32_t ab_32 = a_32 * b_32;
|
||||
std::int16_t ab_x2_high16 = static_cast<std::int16_t>((ab_32) / (1 << 15));
|
||||
return overflow ? std::numeric_limits<std::int16_t>::max() : ab_x2_high16;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void HardSwish(const RuntimeShape& input_shape, const T* input_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
ruy::profiler::ScopeLabel label("ReferenceHardSwish/Float");
|
||||
auto matching_size = MatchingFlatSize(input_shape, output_shape);
|
||||
const T* in_end = input_data + matching_size;
|
||||
for (; input_data < in_end; input_data++, output_data++) {
|
||||
const float in = *input_data;
|
||||
*output_data =
|
||||
in * std::min(static_cast<T>(6), std::max(static_cast<T>(0), in + 3)) /
|
||||
6;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void HardSwish(const HardSwishParams& params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
ruy::profiler::ScopeLabel label("ReferenceHardSwish/Quantized");
|
||||
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
const int16_t input_value = input_data[i] - params.input_zero_point;
|
||||
// Left-shift as much as we can without overflow/saturation to put
|
||||
// significant bits in the high bits of our 16-bit fixedpoint values, so
|
||||
// that fixed-point approximate computations below are as accurate as
|
||||
// possible.
|
||||
const int16_t input_value_on_hires_input_scale = input_value * (1 << 7);
|
||||
// Compute the input value on essentially the output scale, just not
|
||||
// right-shifted yet. This is the value that we'll use in the (x >= +3)
|
||||
// case, and that in the general case we'll multiply against the "relu-ish"
|
||||
// fixed-point multiplier in [0, 1].
|
||||
const int16_t input_value_on_preshift_output_scale =
|
||||
gemmlowp::SaturatingRoundingDoublingHighMul(
|
||||
input_value_on_hires_input_scale,
|
||||
params.output_multiplier_fixedpoint_int16);
|
||||
// Now compute the "relu-ish multiplier". In the (-3 <= x <= +3) case, that
|
||||
// is just an affine rescaling of x from [-3, 3] to [0, 1]. In the general
|
||||
// case, it is just that plus saturation at the boundaries of [-3, 3].
|
||||
// First, we rescale from [-3, 3] to [-1, 1], saturating.
|
||||
// That is done by rescaling the input value with a fixed-point multiplier
|
||||
// (reluish_multiplier_fixedpoint) and bit-shift such that we represent
|
||||
// that input value on the scale where the real value 3.0f is represented
|
||||
// by the quantized value 32768. (+32768 is actually not representable as
|
||||
// int16_t, so this saturates at +32767, and that is seen empirically to be
|
||||
// a negligible contribution to numerical error/bias).
|
||||
//
|
||||
// This code is careful to correctly implement any magnitude of multiplier,
|
||||
// involving either a right shift or a left shift, with correct saturation
|
||||
// behavior in the left-shift case. This forces this code to be more
|
||||
// complicated, but is necessary for real applications: a partially
|
||||
// trained quantized MobileNet v3-small model that motivated this code
|
||||
// exhibits some large [min, max] range boundaries, of the order of
|
||||
// magnitude of 10 or 100 depending on layers.
|
||||
//
|
||||
// The next few lines are basically just an ordinary
|
||||
// MultiplyByQuantizedMultiplier, except that we are more careful here
|
||||
// about the fine details of saturation when left-shifting, because here
|
||||
// overflow in left-shift is a common case, not an anomaly as
|
||||
// MultiplyByQuantizedMultiplier assumes.
|
||||
int16_t reluish_value = input_value_on_hires_input_scale;
|
||||
// Shift left, saturating, as much as we can while ensuring that this
|
||||
// saturation will not contribute to the result. That is, left shift amount
|
||||
// reduced by 1.
|
||||
if (params.reluish_multiplier_exponent > 0) {
|
||||
reluish_value = SaturatingLeftShift(
|
||||
reluish_value, params.reluish_multiplier_exponent - 1);
|
||||
}
|
||||
// Apply the fixed-point multiplier, dividing the value by a divisor
|
||||
// ranging in [1, 2].
|
||||
reluish_value = gemmlowp::SaturatingRoundingDoublingHighMul(
|
||||
reluish_value, params.reluish_multiplier_fixedpoint_int16);
|
||||
// Apply the last bit of left-shift. Thus, in the left-shifting case, if
|
||||
// any saturation affects the result, it is happening here --- any
|
||||
// saturation having occurred above is overwritten here, not affecting the
|
||||
// result.
|
||||
if (params.reluish_multiplier_exponent > 0) {
|
||||
reluish_value = SaturatingLeftShift(reluish_value, 1);
|
||||
}
|
||||
// Shift right, in the right-shifting case.
|
||||
if (params.reluish_multiplier_exponent < 0) {
|
||||
reluish_value = gemmlowp::RoundingDivideByPOT(
|
||||
reluish_value, -params.reluish_multiplier_exponent);
|
||||
}
|
||||
// At this point we have rescaled the value into a 16bit fixedpoint
|
||||
// reluish_value in [-1, 1].
|
||||
// We now convert that to a 16bit fixedpoint value in [0, 1].
|
||||
reluish_value = (reluish_value + (1 << 15)) >> 1;
|
||||
// Use of SaturatingDoublingHighMul here is important to cancel the biases
|
||||
// from the above SaturatingRoundingDoublingHighMul.
|
||||
//
|
||||
// On a partially trained MobileNet-v3-small,
|
||||
//
|
||||
// | bias on | ImageNet
|
||||
// | quantized | Top-1
|
||||
// Operation used here | values | accuracy (50k)
|
||||
// --------------------------------------+------------+-----------
|
||||
// SaturatingDoublingHighMul | -0.0024 | 58.920
|
||||
// SaturatingRoundingDoublingHighMul | -0.0067 | 58.064
|
||||
//
|
||||
// In activations_test, this is covered by this testcase:
|
||||
// QuantizedActivationsOpTest.HardSwishBias
|
||||
//
|
||||
const int16_t preshift_output_value = SaturatingDoublingHighMul(
|
||||
reluish_value, input_value_on_preshift_output_scale);
|
||||
// We were so far operating on the pre-shift output scale. Now we finally
|
||||
// apply that output shift, arriving at the final output scale.
|
||||
int16_t output_value = gemmlowp::RoundingDivideByPOT(
|
||||
preshift_output_value, -params.output_multiplier_exponent);
|
||||
output_value += params.output_zero_point;
|
||||
output_value =
|
||||
std::min<int16_t>(output_value, std::numeric_limits<T>::max());
|
||||
output_value =
|
||||
std::max<int16_t>(output_value, std::numeric_limits<T>::min());
|
||||
output_data[i] = output_value;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
||||
@@ -23,34 +23,41 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void CheckArithmeticParams(const ArithmeticParams& params) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
// Input offset is negative input zero point. Activation tensors are
|
||||
// asymmetric quantized so they span the full int8 range.
|
||||
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
|
||||
TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits<int8_t>::min());
|
||||
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
|
||||
TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits<int8_t>::max());
|
||||
}
|
||||
|
||||
// Element-wise add that can often be used for inner loop of broadcast add as
|
||||
// well as the non-broadcast add.
|
||||
inline void AddElementwise(int size, const ArithmeticParams& params,
|
||||
const int8_t* input1_data, const int8_t* input2_data,
|
||||
int8_t* output_data) {
|
||||
const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
|
||||
TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
|
||||
CheckArithmeticParams(params);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<int8_t>(clamped_output);
|
||||
@@ -61,16 +68,11 @@ inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const int8_t* input2_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
CheckArithmeticParams(params);
|
||||
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
|
||||
TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
|
||||
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
|
||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,27 +22,27 @@ namespace reference_integer_ops {
|
||||
|
||||
// Fixed-point per-channel-quantization convolution reference kernel.
|
||||
inline void ConvPerChannel(
|
||||
const ConvParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
const ConvParams& params, const int32_t* output_multiplier,
|
||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
// Get parameters.
|
||||
const int32 input_offset = params.input_offset; // r = s(q - Z)
|
||||
const int32_t input_offset = params.input_offset; // r = s(q - Z)
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
const int dilation_width_factor = params.dilation_width_factor;
|
||||
const int dilation_height_factor = params.dilation_height_factor;
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
|
||||
// Set min and max value of the output.
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Sanity check.
|
||||
// Consistency check.
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
@@ -63,45 +63,47 @@ inline void ConvPerChannel(
|
||||
const int output_width = output_shape.Dims(2);
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
const int in_y = in_y_origin + dilation_height_factor * filter_y;
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
|
||||
if (!is_point_inside_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
int32_t input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
// Accumulate with 32 bits accumulator.
|
||||
// In the nudging process during model quantization, we force
|
||||
// real value of 0.0 be represented by a quantized value. This
|
||||
// guarantees that the input_offset is a int8, even though it
|
||||
// is represented using int32.
|
||||
// int32 += int8 * (int8 - int8) so the highest value we can
|
||||
// get from each accumulation is [-127, 127] * ([-128, 127] -
|
||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
||||
// = 14.98, which means we can accumulate at least 2^16
|
||||
// multiplications without overflow. The accumulator is
|
||||
// applied to a filter so the accumulation logic will hold as
|
||||
// long as the filter size (filter_y * filter_x * in_channel)
|
||||
// does not exceed 2^16, which is the case in all the models
|
||||
// we have seen so far.
|
||||
// TODO(jianlijianli): Add a check to make sure the
|
||||
// accumulator depth is smaller than 2^16.
|
||||
acc += filter_val * (input_val + input_offset);
|
||||
}
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, out_channel, filter_y, filter_x, in_channel)];
|
||||
// Accumulate with 32 bits accumulator.
|
||||
// In the nudging process during model quantization, we force
|
||||
// real value of 0.0 be represented by a quantized value. This
|
||||
// guarantees that the input_offset is a int8_t, even though
|
||||
// it is represented using int32_t. int32_t += int8_t *
|
||||
// (int8_t - int8_t) so the highest value we can get from each
|
||||
// accumulation is [-127, 127] * ([-128, 127] -
|
||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
||||
// = 14.98, which means we can accumulate at least 2^16
|
||||
// multiplications without overflow. The accumulator is
|
||||
// applied to a filter so the accumulation logic will hold as
|
||||
// long as the filter size (filter_y * filter_x * in_channel)
|
||||
// does not exceed 2^16, which is the case in all the models
|
||||
// we have seen so far.
|
||||
// TODO(jianlijianli): Add a check to make sure the
|
||||
// accumulator depth is smaller than 2^16.
|
||||
acc += filter_val * (input_val + input_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -125,12 +127,12 @@ inline void ConvPerChannel(
|
||||
// Fixed-point per-channel-quantization convolution reference kernel.
|
||||
// 16-bit data and 8-bit filter
|
||||
inline void ConvPerChannel(
|
||||
const ConvParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int16* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const ConvParams& params, const int32_t* output_multiplier,
|
||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const std::int64_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
int16_t* output_data) {
|
||||
// Get parameters.
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
@@ -140,10 +142,10 @@ inline void ConvPerChannel(
|
||||
const int pad_height = params.padding_values.height;
|
||||
|
||||
// Set min and max value of the output.
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Sanity check.
|
||||
// Consistency check.
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||
@@ -164,35 +166,37 @@ inline void ConvPerChannel(
|
||||
const int output_width = output_shape.Dims(2);
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
for (int out_y = 0; out_y < output_height; ++out_y) {
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
for (int out_x = 0; out_x < output_width; ++out_x) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
std::int64_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
const int in_y = in_y_origin + dilation_height_factor * filter_y;
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
|
||||
if (!is_point_inside_image) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
const int in_y =
|
||||
in_y_origin + dilation_height_factor * filter_y;
|
||||
// Zero padding by omitting the areas outside the image.
|
||||
const bool is_point_inside_image =
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
int32_t input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val =
|
||||
filter_data[Offset(filter_shape, out_channel, filter_y,
|
||||
filter_x, in_channel)];
|
||||
// Accumulate with 64 bits accumulator.
|
||||
// int64 += int8 * int16 so the highest value we can
|
||||
// get from each accumulation is [-127, 127] * ([-32768,
|
||||
// 32767] -
|
||||
// [-32768, 32767]), which is [-8322945, 8322945].
|
||||
// log2(8322945) = 22.99.
|
||||
acc += filter_val * input_val;
|
||||
}
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, out_channel, filter_y, filter_x, in_channel)];
|
||||
// Accumulate with 64 bits accumulator.
|
||||
// int64_t += int8_t * int16_t so the highest value we can
|
||||
// get from each accumulation is [-127, 127] * ([-32768,
|
||||
// 32767] -
|
||||
// [-32768, 32767]), which is [-8322945, 8322945].
|
||||
// log2(8322945) = 22.99.
|
||||
acc += filter_val * input_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,12 +20,12 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
inline void DepthwiseConvPerChannel(
|
||||
const DepthwiseParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
const DepthwiseParams& params, const int32_t* output_multiplier,
|
||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
// Get parameters.
|
||||
// TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
|
||||
const int stride_width = params.stride_width;
|
||||
@@ -35,10 +35,10 @@ inline void DepthwiseConvPerChannel(
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@@ -66,7 +66,7 @@ inline void DepthwiseConvPerChannel(
|
||||
const int output_channel = m + in_channel * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
@@ -77,17 +77,17 @@ inline void DepthwiseConvPerChannel(
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
// Accumulate with 32 bits accumulator.
|
||||
// In the nudging process during model quantization, we force
|
||||
// real value of 0.0 be represented by a quantized value. This
|
||||
// guarantees that the input_offset is a int8, even though it
|
||||
// is represented using int32.
|
||||
// int32 += int8 * (int8 - int8) so the highest value we can
|
||||
// get from each accumulation is [-127, 127] * ([-128, 127] -
|
||||
// guarantees that the input_offset is a int8_t, even though
|
||||
// it is represented using int32_t. int32_t += int8_t *
|
||||
// (int8_t - int8_t) so the highest value we can get from each
|
||||
// accumulation is [-127, 127] * ([-128, 127] -
|
||||
// [-128, 127]), which is [-32512, 32512]. log2(32512)
|
||||
// = 14.98, which means we can accumulate at least 2^16
|
||||
// multiplications without overflow. The accumulator is
|
||||
@@ -120,12 +120,12 @@ inline void DepthwiseConvPerChannel(
|
||||
}
|
||||
|
||||
inline void DepthwiseConvPerChannel(
|
||||
const DepthwiseParams& params, const int32* output_multiplier,
|
||||
const int32* output_shift, const RuntimeShape& input_shape,
|
||||
const int16* input_data, const RuntimeShape& filter_shape,
|
||||
const int8* filter_data, const RuntimeShape& bias_shape,
|
||||
const DepthwiseParams& params, const int32_t* output_multiplier,
|
||||
const int32_t* output_shift, const RuntimeShape& input_shape,
|
||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const std::int64_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
int16_t* output_data) {
|
||||
// Get parameters.
|
||||
const int stride_width = params.stride_width;
|
||||
const int stride_height = params.stride_height;
|
||||
@@ -134,8 +134,8 @@ inline void DepthwiseConvPerChannel(
|
||||
const int pad_width = params.padding_values.width;
|
||||
const int pad_height = params.padding_values.height;
|
||||
const int depth_multiplier = params.depth_multiplier;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
|
||||
// Check dimensions of the tensors.
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@@ -174,9 +174,9 @@ inline void DepthwiseConvPerChannel(
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
// Accumulate with 64 bits accumulator.
|
||||
// We assume maximum of 2^16 accumulations as with the 8-bit
|
||||
@@ -190,7 +190,7 @@ inline void DepthwiseConvPerChannel(
|
||||
if (bias_data) {
|
||||
acc += bias_data[output_channel];
|
||||
}
|
||||
int32 scaled_acc = MultiplyByQuantizedMultiplier(
|
||||
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
|
||||
acc, output_multiplier[output_channel],
|
||||
output_shift[output_channel]);
|
||||
scaled_acc = std::max(scaled_acc, output_activation_min);
|
||||
@@ -207,8 +207,8 @@ inline void DepthwiseConvPerChannel(
|
||||
|
||||
inline void DepthwiseConvHybridPerChannel(
|
||||
const DepthwiseParams& params, float* scaling_factors_ptr,
|
||||
const RuntimeShape& input_shape, const int8* input_data,
|
||||
const RuntimeShape& filter_shape, const int8* filter_data,
|
||||
const RuntimeShape& input_shape, const int8_t* input_data,
|
||||
const RuntimeShape& filter_shape, const int8_t* filter_data,
|
||||
const RuntimeShape& bias_shape, const float* bias_data,
|
||||
const RuntimeShape& output_shape, float* output_data,
|
||||
const float* per_channel_scale, int32_t* input_offset) {
|
||||
@@ -247,7 +247,7 @@ inline void DepthwiseConvHybridPerChannel(
|
||||
const int output_channel = m + in_channel * depth_multiplier;
|
||||
const int in_x_origin = (out_x * stride_width) - pad_width;
|
||||
const int in_y_origin = (out_y * stride_height) - pad_height;
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
|
||||
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
|
||||
const int in_x = in_x_origin + dilation_width_factor * filter_x;
|
||||
@@ -258,9 +258,9 @@ inline void DepthwiseConvHybridPerChannel(
|
||||
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
|
||||
(in_y < input_height);
|
||||
if (is_point_inside_image) {
|
||||
int32 input_val = input_data[Offset(input_shape, batch, in_y,
|
||||
in_x, in_channel)];
|
||||
int32 filter_val = filter_data[Offset(
|
||||
int32_t input_val = input_data[Offset(
|
||||
input_shape, batch, in_y, in_x, in_channel)];
|
||||
int32_t filter_val = filter_data[Offset(
|
||||
filter_shape, 0, filter_y, filter_x, output_channel)];
|
||||
acc += filter_val * (input_val - input_offset[batch]);
|
||||
}
|
||||
|
||||
@@ -24,15 +24,15 @@ inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32* bias_data, const RuntimeShape& output_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
const int32 input_offset = params.input_offset;
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_offset = params.output_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
|
||||
|
||||
@@ -44,10 +44,10 @@ inline void FullyConnected(
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32 input_val = input_data[b * accum_depth + d];
|
||||
int32 filter_val = filter_data[out_c * accum_depth + d];
|
||||
int32_t input_val = input_data[b * accum_depth + d];
|
||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += (filter_val + filter_offset) * (input_val + input_offset);
|
||||
}
|
||||
if (bias_data) {
|
||||
@@ -68,11 +68,11 @@ inline void FullyConnected(
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int64_t* bias_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
const int32 filter_offset = params.weights_offset;
|
||||
const int32 output_multiplier = params.output_multiplier;
|
||||
const int32_t filter_offset = params.weights_offset;
|
||||
const int32_t output_multiplier = params.output_multiplier;
|
||||
const int output_shift = params.output_shift;
|
||||
const int32 output_activation_min = params.quantized_activation_min;
|
||||
const int32 output_activation_max = params.quantized_activation_max;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
|
||||
|
||||
@@ -86,8 +86,8 @@ inline void FullyConnected(
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
int64_t acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32 input_val = input_data[b * accum_depth + d];
|
||||
int32 filter_val = filter_data[out_c * accum_depth + d];
|
||||
int32_t input_val = input_data[b * accum_depth + d];
|
||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += (filter_val + filter_offset) * input_val;
|
||||
}
|
||||
if (bias_data) {
|
||||
|
||||
@@ -21,8 +21,8 @@ namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
|
||||
int32_t depth, const int8* input_data,
|
||||
int8* output_data) {
|
||||
int32_t depth, const int8_t* input_data,
|
||||
int8_t* output_data) {
|
||||
static constexpr int8_t kMinInt8 = std::numeric_limits<int8_t>::min();
|
||||
static constexpr int8_t kMaxInt8 = std::numeric_limits<int8_t>::max();
|
||||
// The output scale must be in sync with Prepare().
|
||||
@@ -30,7 +30,7 @@ inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
|
||||
// to [-1, 127/128].
|
||||
static constexpr int32_t kOutputScale = 7;
|
||||
for (int outer_index = 0; outer_index < outer_size; ++outer_index) {
|
||||
// int32 = (int8 - int8) ^ 2.
|
||||
// int32_t = (int8_t - int8_t) ^ 2.
|
||||
// ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is
|
||||
// safe from overflowing in at least 2^16 steps.
|
||||
int32_t acc = 0;
|
||||
@@ -55,7 +55,7 @@ inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
|
||||
std::min(static_cast<int32_t>(kMaxInt8),
|
||||
std::max(static_cast<int32_t>(kMinInt8), output_in_q24));
|
||||
output_data[depth * outer_index + inner_index] =
|
||||
static_cast<int8>(output_in_q24);
|
||||
static_cast<int8_t>(output_in_q24);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,12 +58,15 @@ inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
|
||||
}
|
||||
}
|
||||
|
||||
inline void Logistic(int32_t input_size, const int16_t* ptr_input_data,
|
||||
int16_t* ptr_output_data) {
|
||||
inline void Logistic(int32_t input_multiplier, int32_t input_size,
|
||||
const int16_t* ptr_input_data, int16_t* ptr_output_data) {
|
||||
// We use the LUT for sigmoid and take into account, that
|
||||
// tanh(x) = 2*sigmoid(2*x) - 1
|
||||
|
||||
int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
|
||||
|
||||
for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
|
||||
int32_t input_data = *ptr_input_data;
|
||||
int32_t input_data = (*ptr_input_data) * input_data_mul;
|
||||
|
||||
// Scale by 3/4 to expand range [-8,8]->[-10.7,10.7] and
|
||||
// we do interpolation on unsigned values.
|
||||
@@ -72,13 +75,20 @@ inline void Logistic(int32_t input_size, const int16_t* ptr_input_data,
|
||||
// We divide by 2 power of 9, because
|
||||
// we need to divide by 2 in power of 7 for
|
||||
// the input conversion + 1/4 from the scale above.
|
||||
uint8_t uh = abs_input_data >> 9;
|
||||
uint32_t ua = sigmoid_table_uint16[uh];
|
||||
uint32_t ub = sigmoid_table_uint16[uh + 1];
|
||||
uint32_t ut = abs_input_data & 0x1ff;
|
||||
// Define uh as uint32_t type not to make this function overflow.
|
||||
uint32_t uh = abs_input_data >> 9;
|
||||
uint32_t result;
|
||||
|
||||
// Interpolation is done using the fractional bit.
|
||||
uint32_t result = (ua << 9) + ut * (ub - ua);
|
||||
if (uh >= 255) {
|
||||
// Saturate to maximum.
|
||||
result = 0x7FFF << 10;
|
||||
} else {
|
||||
uint32_t ua = sigmoid_table_uint16[uh];
|
||||
uint32_t ub = sigmoid_table_uint16[uh + 1];
|
||||
uint32_t ut = abs_input_data & 0x1ff;
|
||||
// Interpolation is done using the fractional bit.
|
||||
result = (ua << 9) + ut * (ub - ua);
|
||||
}
|
||||
|
||||
result = (input_data >= 0) ? (result + (1 << 9))
|
||||
: ((1 << (16 + 9)) - result + (1 << 9) - 1);
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
template <typename integer_type>
|
||||
inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
|
||||
int32_t shift, const RuntimeShape& unextended_input_shape,
|
||||
const integer_type* input_data, int32_t input_zero_point,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
integer_type* output_data, int32_t output_zero_point) {
|
||||
// Current implementation only supports dimension equals 4 and simultaneous
|
||||
// reduction over width and height.
|
||||
TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
|
||||
TFLITE_CHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape input_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_input_shape);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
const int output_batch = output_shape.Dims(0);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
const int output_depth = output_shape.Dims(3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int num_elements_in_axis = input_width * input_height;
|
||||
|
||||
TFLITE_CHECK_EQ(op_params.axis_count, 2);
|
||||
TFLITE_CHECK((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
|
||||
TFLITE_CHECK_EQ(output_height, 1);
|
||||
TFLITE_CHECK_EQ(output_width, 1);
|
||||
|
||||
static constexpr int32_t kMinInt = std::numeric_limits<integer_type>::min();
|
||||
static constexpr int32_t kMaxInt = std::numeric_limits<integer_type>::max();
|
||||
|
||||
for (int out_b = 0; out_b < output_batch; ++out_b) {
|
||||
for (int out_d = 0; out_d < output_depth; ++out_d) {
|
||||
int32_t acc = 0;
|
||||
for (int in_h = 0; in_h < input_height; ++in_h) {
|
||||
for (int in_w = 0; in_w < input_width; ++in_w) {
|
||||
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] -
|
||||
input_zero_point;
|
||||
}
|
||||
}
|
||||
acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
|
||||
acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
|
||||
: (acc - num_elements_in_axis / 2) / num_elements_in_axis;
|
||||
acc += output_zero_point;
|
||||
acc = std::min(std::max(acc, kMinInt), kMaxInt);
|
||||
output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
|
||||
static_cast<integer_type>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_integer_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
|
||||
@@ -27,14 +27,14 @@ inline void MulElementwise(int size, const ArithmeticParams& params,
|
||||
const T* input1_data, const T* input2_data,
|
||||
T* output_data) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 unclamped_result =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[i] = static_cast<T>(clamped_output);
|
||||
@@ -57,13 +57,13 @@ inline void Mul(const ArithmeticParams& params,
|
||||
|
||||
// Mul with 16 bit inputs and int8_t outputs.
|
||||
inline void Mul(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int16* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16* input2_data,
|
||||
const RuntimeShape& input1_shape, const int16_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16_t* input2_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("Mul/Int16Int8");
|
||||
int32 output_offset = params.output_offset;
|
||||
int32 output_activation_min = params.quantized_activation_min;
|
||||
int32 output_activation_max = params.quantized_activation_max;
|
||||
int32_t output_offset = params.output_offset;
|
||||
int32_t output_activation_min = params.quantized_activation_min;
|
||||
int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
|
||||
const int flat_size =
|
||||
@@ -75,12 +75,12 @@ inline void Mul(const ArithmeticParams& params,
|
||||
|
||||
F0 unclamped_result =
|
||||
F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
|
||||
int16 rescaled_result =
|
||||
int16_t rescaled_result =
|
||||
gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
|
||||
int16 clamped_result =
|
||||
std::min<int16>(output_activation_max - output_offset, rescaled_result);
|
||||
clamped_result =
|
||||
std::max<int16>(output_activation_min - output_offset, clamped_result);
|
||||
int16_t clamped_result = std::min<int16_t>(
|
||||
output_activation_max - output_offset, rescaled_result);
|
||||
clamped_result = std::max<int16_t>(output_activation_min - output_offset,
|
||||
clamped_result);
|
||||
output_data[i] = output_offset + clamped_result;
|
||||
}
|
||||
}
|
||||
@@ -104,18 +104,18 @@ inline void BroadcastMul4DSlow(
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
const int32_t input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
const int32_t input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
const int32 unclamped_result =
|
||||
const int32_t unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output = std::min(
|
||||
const int32_t clamped_output = std::min(
|
||||
params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
|
||||
@@ -22,8 +22,9 @@ namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void AveragePool(const PoolParams& params,
|
||||
const RuntimeShape& input_shape, const int8* input_data,
|
||||
const RuntimeShape& output_shape, int8* output_data) {
|
||||
const RuntimeShape& input_shape,
|
||||
const int8_t* input_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@@ -52,7 +53,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
int filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
@@ -71,7 +72,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
acc = std::max(acc, params.quantized_activation_min);
|
||||
acc = std::min(acc, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<int8>(acc);
|
||||
static_cast<int8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -79,8 +80,8 @@ inline void AveragePool(const PoolParams& params,
|
||||
}
|
||||
|
||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const int8* input_data, const RuntimeShape& output_shape,
|
||||
int8* output_data) {
|
||||
const int8_t* input_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_GE(params.quantized_activation_min,
|
||||
@@ -137,8 +138,9 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
|
||||
inline void AveragePool(const PoolParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const int16* input_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
const int16_t* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@@ -167,7 +169,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
int filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
@@ -186,7 +188,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
acc = std::max(acc, params.quantized_activation_min);
|
||||
acc = std::min(acc, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<int16>(acc);
|
||||
static_cast<int16_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -194,8 +196,8 @@ inline void AveragePool(const PoolParams& params,
|
||||
}
|
||||
|
||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const int16* input_data, const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
const int16_t* input_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_GE(params.quantized_activation_min,
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
inline void Tanh(int32_t input_zero_point, int32_t input_range_radius,
|
||||
int32_t input_multiplier, int32_t input_shift,
|
||||
const RuntimeShape& input_shape, const int8_t* input_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
// Integer bits must be in sync with Prepare() function.
|
||||
static constexpr int32_t kInputIntegerBits = 4;
|
||||
static constexpr int32_t kOutputScale = 7;
|
||||
static constexpr int32_t kMinInt8 = std::numeric_limits<int8_t>::min();
|
||||
static constexpr int32_t kMaxInt8 = std::numeric_limits<int8_t>::max();
|
||||
using F4 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
|
||||
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const int32_t input =
|
||||
static_cast<int32_t>(input_data[i]) - input_zero_point;
|
||||
if (input <= -input_range_radius) {
|
||||
output_data[i] = kMinInt8;
|
||||
} else if (input >= input_range_radius) {
|
||||
output_data[i] = kMaxInt8;
|
||||
} else {
|
||||
const int32_t input_in_q4 =
|
||||
MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift);
|
||||
const int32_t output_in_q0 =
|
||||
gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw();
|
||||
|
||||
// Rescale and downcast.
|
||||
using gemmlowp::RoundingDivideByPOT;
|
||||
int32_t output_in_q24 =
|
||||
RoundingDivideByPOT(output_in_q0, 31 - kOutputScale);
|
||||
output_in_q24 = std::min(std::max(output_in_q24, kMinInt8), kMaxInt8);
|
||||
output_data[i] = static_cast<int8_t>(output_in_q24);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
|
||||
const RuntimeShape& input_shape, const int16_t* ptr_input_data,
|
||||
const RuntimeShape& output_shape, int16_t* ptr_output_data) {
|
||||
// We use the LUT for sigmoid and take into account, that
|
||||
// tanh(x) = 2*sigmoid(2*x) - 1
|
||||
|
||||
int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
|
||||
|
||||
int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) {
|
||||
int32_t input_data = (*ptr_input_data) * input_data_mul;
|
||||
|
||||
if (input_left_shift == 1) {
|
||||
input_data <<= 1;
|
||||
}
|
||||
|
||||
// Scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
|
||||
uint32_t abs_input_data = 3 * abs(input_data);
|
||||
uint32_t uh = abs_input_data >> 8;
|
||||
int32_t result;
|
||||
|
||||
if (uh >= 255) {
|
||||
// Saturate to maximum.
|
||||
result = 0xFFFF << 8;
|
||||
} else {
|
||||
uint32_t ua = sigmoid_table_uint16[uh];
|
||||
uint32_t ub = sigmoid_table_uint16[uh + 1];
|
||||
|
||||
uint8_t ut = abs_input_data & 0xFF;
|
||||
|
||||
result = (ua << 8) + ut * (ub - ua);
|
||||
}
|
||||
|
||||
result = (input_data >= 0)
|
||||
? (result - (1 << (14 + 9)) + (1 << (9 - 2)))
|
||||
: (-result + (1 << (14 + 9)) + (1 << (9 - 2)) - 1);
|
||||
|
||||
// Convert back to 16-bit.
|
||||
result >>= (9 - 1);
|
||||
|
||||
*ptr_output_data = result;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_integer_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
|
||||
@@ -52,40 +52,39 @@ inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
|
||||
|
||||
inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data,
|
||||
const uint8_t* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
uint8_t* output_data) {
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
const int32 input_zero_point = op_params.input_zero_point;
|
||||
const int32_t input_zero_point = op_params.input_zero_point;
|
||||
|
||||
for (int i = 0; i < outer_size; ++i) {
|
||||
int32 square_l2_norm = 0;
|
||||
int32_t square_l2_norm = 0;
|
||||
for (int c = 0; c < depth; c++) {
|
||||
int32 diff = input_data[depth * i + c] - input_zero_point;
|
||||
int32_t diff = input_data[depth * i + c] - input_zero_point;
|
||||
square_l2_norm += diff * diff;
|
||||
}
|
||||
int32 inv_l2norm_multiplier;
|
||||
int32_t inv_l2norm_multiplier;
|
||||
int inv_l2norm_shift;
|
||||
GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
|
||||
&inv_l2norm_multiplier, &inv_l2norm_shift);
|
||||
for (int c = 0; c < depth; c++) {
|
||||
int32 diff = input_data[depth * i + c] - input_zero_point;
|
||||
int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
int32_t diff = input_data[depth * i + c] - input_zero_point;
|
||||
int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
|
||||
int32 unclamped_output_val = 128 + rescaled_diff;
|
||||
int32 output_val =
|
||||
std::min(static_cast<int32>(255),
|
||||
std::max(static_cast<int32>(0), unclamped_output_val));
|
||||
output_data[depth * i + c] = static_cast<uint8>(output_val);
|
||||
int32_t unclamped_output_val = 128 + rescaled_diff;
|
||||
int32_t output_val =
|
||||
std::min(static_cast<int32_t>(255),
|
||||
std::max(static_cast<int32_t>(0), unclamped_output_val));
|
||||
output_data[depth * i + c] = static_cast<uint8_t>(output_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
|
||||
|
||||
@@ -66,8 +66,8 @@ inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
|
||||
}
|
||||
|
||||
inline void Logistic(const LogisticParams& params,
|
||||
const RuntimeShape& input_shape, const int16* input_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
const RuntimeShape& input_shape, const int16_t* input_data,
|
||||
const RuntimeShape& output_shape, int16_t* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
@@ -84,12 +84,12 @@ inline void Logistic(const LogisticParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
// Quantized int8 logistic activation. Cheats by dequantizing and requantizing
|
||||
// around the floating point logistic method. This implementation is slow on
|
||||
// platforms without a floating point unit.
|
||||
// Quantized int8_t logistic activation. Cheats by dequantizing and
|
||||
// requantizing around the floating point logistic method. This implementation
|
||||
// is slow on platforms without a floating point unit.
|
||||
|
||||
// TODO(b/141211002): Delete this int8 implementation once we can reuse the
|
||||
// approach used in TFLite for int8 Logistic.
|
||||
// TODO(b/141211002): Delete this int8_t implementation once we can reuse the
|
||||
// approach used in TFLite for int8_t Logistic.
|
||||
inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
|
||||
float input_scale, int input_zero_point,
|
||||
const RuntimeShape& output_shape, int8_t* output_data,
|
||||
|
||||
@@ -24,20 +24,20 @@ namespace reference_ops {
|
||||
// Element-wise mul that can often be used for inner loop of broadcast Mul as
|
||||
// well as the non-broadcast Mul.
|
||||
inline void MulElementwise(int size, const ArithmeticParams& params,
|
||||
const uint8* input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
const uint8_t* input1_data,
|
||||
const uint8_t* input2_data, uint8_t* output_data) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 unclamped_result =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
output_data[i] = static_cast<uint8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,9 +60,9 @@ inline void Mul(const ArithmeticParams& params,
|
||||
}
|
||||
|
||||
inline void Mul(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8* input2_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
const RuntimeShape& input1_shape, const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape, uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
@@ -73,11 +73,11 @@ inline void Mul(const ArithmeticParams& params,
|
||||
|
||||
inline void BroadcastMul4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const uint8* input1_data,
|
||||
const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const uint8* input2_data,
|
||||
const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
uint8_t* output_data) {
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
@@ -89,22 +89,22 @@ inline void BroadcastMul4DSlow(const ArithmeticParams& params,
|
||||
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
const int32 input1_val =
|
||||
const int32_t input1_val =
|
||||
params.input1_offset +
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)];
|
||||
const int32 input2_val =
|
||||
const int32_t input2_val =
|
||||
params.input2_offset +
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
const int32 unclamped_result =
|
||||
const int32_t unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(input1_val * input2_val,
|
||||
params.output_multiplier,
|
||||
params.output_shift);
|
||||
const int32 clamped_output = std::min(
|
||||
const int32_t clamped_output = std::min(
|
||||
params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[Offset(extended_output_shape, b, y, x, c)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
static_cast<uint8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,8 +32,8 @@ constexpr int PadKernelMaxDimensionCount() { return 4; }
|
||||
// equivalent to a simple input1_data. For Pad, it should point to a zero
|
||||
// value.
|
||||
//
|
||||
// Note that two typenames are required, so that T=P=int32 is considered a
|
||||
// specialization distinct from P=int32.
|
||||
// Note that two typenames are required, so that T=P=int32_t is considered a
|
||||
// specialization distinct from P=int32_t.
|
||||
template <typename T, typename P>
|
||||
inline void PadImpl(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
@@ -116,11 +116,11 @@ inline void Pad(const tflite::PadParams& op_params,
|
||||
output_data);
|
||||
}
|
||||
|
||||
// The second (pad-value) input can be int32 when, say, the first is uint8.
|
||||
// The second (pad-value) input can be int32_t when, say, the first is uint8_t.
|
||||
template <typename T>
|
||||
inline void Pad(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const int32* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
T* output_data) {
|
||||
const T converted_pad_value = static_cast<T>(*pad_value_ptr);
|
||||
PadImpl(op_params, input_shape, input_data, &converted_pad_value,
|
||||
@@ -130,40 +130,18 @@ inline void Pad(const tflite::PadParams& op_params,
|
||||
// This version avoids conflicting template matching.
|
||||
template <>
|
||||
inline void Pad(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const int32* input_data,
|
||||
const int32* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
const RuntimeShape& input_shape, const int32_t* input_data,
|
||||
const int32_t* pad_value_ptr, const RuntimeShape& output_shape,
|
||||
int32_t* output_data) {
|
||||
PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
// One could make all PadImageStyle calls simply delegate the work to the
|
||||
// ordinary Pad. However, it is better that the reference code asserts false in
|
||||
// similar cases.
|
||||
template <typename T, typename P>
|
||||
inline void PadImageStyle(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape, const T* input_data,
|
||||
const P* pad_value_ptr,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
TFLITE_ASSERT_FALSE;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void PadImageStyle(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const P* pad_value_ptr,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void PadImageStyle(const tflite::PadParams& op_params,
|
||||
const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const P* pad_value_ptr,
|
||||
const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
Pad(op_params, input_shape, input_data, pad_value_ptr, output_shape,
|
||||
output_data);
|
||||
}
|
||||
|
||||
@@ -78,8 +78,9 @@ inline void AveragePool(const PoolParams& params,
|
||||
|
||||
inline void AveragePool(const PoolParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const uint8* input_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
const uint8_t* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
@@ -108,7 +109,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
int filter_count = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
@@ -125,7 +126,7 @@ inline void AveragePool(const PoolParams& params,
|
||||
acc = std::max(acc, params.quantized_activation_min);
|
||||
acc = std::min(acc, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<uint8>(acc);
|
||||
static_cast<uint8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -237,8 +238,8 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
}
|
||||
|
||||
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const uint8* input_data, const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
const uint8_t* input_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
TFLITE_DCHECK_GE(params.quantized_activation_min, 0);
|
||||
@@ -269,7 +270,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
const int filter_y_start = std::max(0, -in_y_origin);
|
||||
const int filter_y_end =
|
||||
std::min(params.filter_height, input_height - in_y_origin);
|
||||
uint8 max = 0;
|
||||
uint8_t max = 0;
|
||||
for (int filter_y = filter_y_start; filter_y < filter_y_end;
|
||||
++filter_y) {
|
||||
for (int filter_x = filter_x_start; filter_x < filter_x_end;
|
||||
@@ -281,10 +282,10 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
|
||||
input_data[Offset(input_shape, batch, in_y, in_x, channel)]);
|
||||
}
|
||||
}
|
||||
max = std::max<uint8>(max, params.quantized_activation_min);
|
||||
max = std::min<uint8>(max, params.quantized_activation_max);
|
||||
max = std::max<uint8_t>(max, params.quantized_activation_min);
|
||||
max = std::min<uint8_t>(max, params.quantized_activation_max);
|
||||
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
|
||||
static_cast<uint8>(max);
|
||||
static_cast<uint8_t>(max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
// Broadcast prelu to output_shape for quantized uint8/int8 data.
|
||||
// Broadcast prelu to output_shape for quantized uint8_t/int8_t data.
|
||||
template <typename T>
|
||||
inline void BroadcastPrelu4DSlow(
|
||||
const PreluParams& params, const RuntimeShape& input_shape,
|
||||
@@ -44,24 +44,26 @@ inline void BroadcastPrelu4DSlow(
|
||||
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
|
||||
int output_index = Offset(extended_output_shape, b, y, x, c);
|
||||
int input_index = SubscriptToIndex(desc1, b, y, x, c);
|
||||
const int32 input_value =
|
||||
const int32_t input_value =
|
||||
params.input_offset + input_data[input_index];
|
||||
int32 output_value;
|
||||
int32_t output_value;
|
||||
if (input_value >= 0) {
|
||||
output_value = input_value;
|
||||
output_value = MultiplyByQuantizedMultiplier(
|
||||
input_value, params.output_multiplier_1, params.output_shift_1);
|
||||
} else {
|
||||
auto alpha_index = SubscriptToIndex(desc2, b, y, x, c);
|
||||
const int32 alpha_value =
|
||||
const int32_t alpha_value =
|
||||
params.alpha_offset + alpha_data[alpha_index];
|
||||
|
||||
output_value = MultiplyByQuantizedMultiplier(
|
||||
input_value * alpha_value, params.output_multiplier,
|
||||
params.output_shift);
|
||||
input_value * alpha_value, params.output_multiplier_2,
|
||||
params.output_shift_2);
|
||||
}
|
||||
output_value += params.output_offset;
|
||||
|
||||
const int32 quantized_min = std::numeric_limits<T>::min();
|
||||
const int32 quantized_max = std::numeric_limits<T>::max();
|
||||
const int32 clamped_output =
|
||||
const int32_t quantized_min = std::numeric_limits<T>::min();
|
||||
const int32_t quantized_max = std::numeric_limits<T>::max();
|
||||
const int32_t clamped_output =
|
||||
std::min(quantized_max, std::max(quantized_min, output_value));
|
||||
output_data[output_index] = static_cast<T>(clamped_output);
|
||||
}
|
||||
@@ -70,6 +72,37 @@ inline void BroadcastPrelu4DSlow(
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape,
|
||||
const T* input_data, const RuntimeShape& alpha_shape,
|
||||
const T* alpha_data, const RuntimeShape& output_shape,
|
||||
T* output_data) {
|
||||
const int32_t quantized_min = std::numeric_limits<T>::min();
|
||||
const int32_t quantized_max = std::numeric_limits<T>::max();
|
||||
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input_shape, alpha_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const int32_t input_value = params.input_offset + input_data[i];
|
||||
int32_t output_value;
|
||||
if (input_value >= 0) {
|
||||
output_value = MultiplyByQuantizedMultiplier(
|
||||
input_value, params.output_multiplier_1, params.output_shift_1);
|
||||
} else {
|
||||
const int32_t alpha_value = params.alpha_offset + alpha_data[i];
|
||||
|
||||
output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value,
|
||||
params.output_multiplier_2,
|
||||
params.output_shift_2);
|
||||
}
|
||||
output_value += params.output_offset;
|
||||
|
||||
const int32_t clamped_output =
|
||||
std::min(quantized_max, std::max(quantized_min, output_value));
|
||||
output_data[i] = static_cast<T>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
|
||||
@@ -76,6 +76,10 @@ inline bool ProcessBroadcastShapes(const RuntimeShape& shape0,
|
||||
BroadcastableOpCategory::kFirstInputBroadcastsFast &&
|
||||
params->broadcast_category !=
|
||||
BroadcastableOpCategory::kSecondInputBroadcastsFast) {
|
||||
// This is unreachable because at least one else clause in the above loop
|
||||
// must be reached.
|
||||
TFLITE_DCHECK(false);
|
||||
params->broadcast_category = BroadcastableOpCategory::kNonBroadcast;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,11 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_QUANTIZE_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
@@ -29,18 +33,18 @@ inline void AffineQuantize(const tflite::QuantizationParams& op_params,
|
||||
const InputT* input_data,
|
||||
const RuntimeShape& output_shape,
|
||||
OutputT* output_data) {
|
||||
const int32 zero_point = op_params.zero_point;
|
||||
const int32_t zero_point = op_params.zero_point;
|
||||
const double scale = op_params.scale;
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
static constexpr int32 min_val = std::numeric_limits<OutputT>::min();
|
||||
static constexpr int32 max_val = std::numeric_limits<OutputT>::max();
|
||||
static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
|
||||
static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
const InputT val = input_data[i];
|
||||
int32 unclamped =
|
||||
static_cast<int32>(TfLiteRound(val / static_cast<float>(scale))) +
|
||||
int32_t unclamped =
|
||||
static_cast<int32_t>(TfLiteRound(val / static_cast<float>(scale))) +
|
||||
zero_point;
|
||||
int32 clamped = std::min(std::max(unclamped, min_val), max_val);
|
||||
int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,8 @@ limitations under the License.
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/max.h"
|
||||
#include "tensorflow/lite/kernels/internal/min.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
@@ -68,6 +70,9 @@ inline bool ResolveAxis(const int num_dims, const int* axis,
|
||||
// eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1] */
|
||||
int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
|
||||
TFLITE_DCHECK(current >= 0 && current < num_dims);
|
||||
if (current < 0 || current >= num_dims) {
|
||||
return false;
|
||||
}
|
||||
bool is_dup = false;
|
||||
for (int j = 0; j < *out_num_axis; ++j) {
|
||||
if (out_axis[j] == current) {
|
||||
@@ -127,6 +132,11 @@ inline bool ReduceGeneric(const T* input_data, const int* input_dims,
|
||||
bool keep_dims, int* temp_index, int* resolved_axis,
|
||||
T init_value,
|
||||
T reducer(const T current, const T in)) {
|
||||
// Return early when input shape has zero dim.
|
||||
for (int i = 0; i < input_num_dims; ++i) {
|
||||
if (input_dims[i] == 0) return true;
|
||||
}
|
||||
|
||||
// Reset output data.
|
||||
if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
|
||||
output_data)) {
|
||||
@@ -184,11 +194,11 @@ inline bool Mean(const T* input_data, const int* input_dims,
|
||||
}
|
||||
|
||||
// Calculate mean by dividing output_data by num of aggregated element.
|
||||
U num_elements_in_axis = 1;
|
||||
size_t num_elements_in_axis = 1;
|
||||
for (int idx = 0; idx < num_resolved_axis; ++idx) {
|
||||
size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
|
||||
// Overflow prevention.
|
||||
if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
|
||||
if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis)) {
|
||||
return false;
|
||||
}
|
||||
num_elements_in_axis *= current;
|
||||
@@ -249,9 +259,9 @@ inline void Mean(const tflite::MeanParams& op_params,
|
||||
|
||||
inline void Mean(const tflite::MeanParams& op_params,
|
||||
const RuntimeShape& unextended_input_shape,
|
||||
const uint8_t* input_data, int32 input_zero_point,
|
||||
const uint8_t* input_data, int32_t input_zero_point,
|
||||
float input_scale, const RuntimeShape& unextended_output_shape,
|
||||
uint8_t* output_data, int32 output_zero_point,
|
||||
uint8_t* output_data, int32_t output_zero_point,
|
||||
float output_scale) {
|
||||
ruy::profiler::ScopeLabel label("Mean4D/Uint8");
|
||||
|
||||
@@ -280,9 +290,9 @@ inline void Mean(const tflite::MeanParams& op_params,
|
||||
constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
|
||||
constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
|
||||
|
||||
int32 bias =
|
||||
int32_t bias =
|
||||
output_zero_point -
|
||||
static_cast<int32>(input_zero_point * input_scale / output_scale);
|
||||
static_cast<int32_t>(input_zero_point * input_scale / output_scale);
|
||||
double real_scale =
|
||||
static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
|
||||
|
||||
@@ -291,7 +301,7 @@ inline void Mean(const tflite::MeanParams& op_params,
|
||||
QuantizeMultiplier(real_scale, &multiplier, &shift);
|
||||
for (int out_b = 0; out_b < output_batch; ++out_b) {
|
||||
for (int out_d = 0; out_d < output_depth; ++out_d) {
|
||||
int32 acc = 0;
|
||||
int32_t acc = 0;
|
||||
for (int in_h = 0; in_h < input_height; ++in_h) {
|
||||
for (int in_w = 0; in_w < input_width; ++in_w) {
|
||||
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
|
||||
@@ -310,18 +320,21 @@ inline void Mean(const tflite::MeanParams& op_params,
|
||||
// It does so in two stages, first calculates the sum of elements along the axis
|
||||
// then divides it by the number of element in axis for quantized values.
|
||||
template <typename T, typename U>
|
||||
inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
|
||||
inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
|
||||
float input_scale, const int* input_dims,
|
||||
const int input_num_dims, T* output_data,
|
||||
int32 output_zero_point, float output_scale,
|
||||
int32_t output_zero_point, float output_scale,
|
||||
const int* output_dims,
|
||||
const int output_num_dims, const int* axis,
|
||||
const int num_axis_dimensions, bool keep_dims,
|
||||
int* temp_index, int* resolved_axis, U* temp_sum,
|
||||
bool compute_sum) {
|
||||
const bool uint8_case = std::is_same<T, int8_t>::value;
|
||||
const bool uint8_case = std::is_same<T, uint8_t>::value;
|
||||
const bool int16_case = std::is_same<T, int16_t>::value;
|
||||
if (uint8_case) {
|
||||
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8");
|
||||
} else if (int16_case) {
|
||||
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16");
|
||||
} else {
|
||||
ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8");
|
||||
}
|
||||
@@ -354,11 +367,11 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
|
||||
}
|
||||
|
||||
// Calculate mean by dividing output_data by num of aggregated element.
|
||||
U num_elements_in_axis = 1;
|
||||
size_t num_elements_in_axis = 1;
|
||||
for (int idx = 0; idx < num_resolved_axis; ++idx) {
|
||||
size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
|
||||
// Overflow prevention.
|
||||
if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
|
||||
if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis)) {
|
||||
return false;
|
||||
}
|
||||
num_elements_in_axis *= current;
|
||||
@@ -368,8 +381,7 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
|
||||
const float scale = input_scale / output_scale;
|
||||
if (compute_sum) {
|
||||
// TODO(b/116341117): Eliminate float and do this completely in 8bit.
|
||||
const float bias =
|
||||
-input_zero_point * scale * num_elements_in_axis + 0.5f;
|
||||
const float bias = -input_zero_point * scale * num_elements_in_axis;
|
||||
for (size_t idx = 0; idx < num_outputs; ++idx) {
|
||||
const U value =
|
||||
static_cast<U>(TfLiteRound(temp_sum[idx] * scale + bias)) +
|
||||
@@ -377,15 +389,15 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
|
||||
output_data[idx] = static_cast<T>(value);
|
||||
}
|
||||
} else {
|
||||
const float bias = -input_zero_point * scale + 0.5f;
|
||||
const float bias = -input_zero_point * scale;
|
||||
for (size_t idx = 0; idx < num_outputs; ++idx) {
|
||||
float float_mean = static_cast<float>(temp_sum[idx]) /
|
||||
static_cast<float>(num_elements_in_axis);
|
||||
float result =
|
||||
std::min(TfLiteRound(float_mean * scale + bias) + output_zero_point,
|
||||
static_cast<float>(std::numeric_limits<T>::max()));
|
||||
result =
|
||||
std::max(result, static_cast<float>(std::numeric_limits<T>::min()));
|
||||
float result = TfLiteMin(
|
||||
TfLiteRound(float_mean * scale + bias) + output_zero_point,
|
||||
static_cast<float>(std::numeric_limits<T>::max()));
|
||||
result = TfLiteMax(result,
|
||||
static_cast<float>(std::numeric_limits<T>::min()));
|
||||
output_data[idx] = static_cast<T>(result);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,28 +17,30 @@ limitations under the License.
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
inline int32 GetNearestNeighbor(const int input_value, const int32 input_size,
|
||||
const int32 output_size,
|
||||
const bool align_corners,
|
||||
const bool half_pixel_centers) {
|
||||
inline int32_t GetNearestNeighbor(const int input_value,
|
||||
const int32_t input_size,
|
||||
const int32_t output_size,
|
||||
const bool align_corners,
|
||||
const bool half_pixel_centers) {
|
||||
const float scale =
|
||||
(align_corners && output_size > 1)
|
||||
? (input_size - 1) / static_cast<float>(output_size - 1)
|
||||
: input_size / static_cast<float>(output_size);
|
||||
const float offset = half_pixel_centers ? 0.5f : 0.0f;
|
||||
int32 output_value = std::min(
|
||||
int32_t output_value = std::min(
|
||||
align_corners
|
||||
? static_cast<int32>(std::round((input_value + offset) * scale))
|
||||
: static_cast<int32>(std::floor((input_value + offset) * scale)),
|
||||
? static_cast<int32_t>(TfLiteRound((input_value + offset) * scale))
|
||||
: static_cast<int32_t>(std::floor((input_value + offset) * scale)),
|
||||
input_size - 1);
|
||||
if (half_pixel_centers) {
|
||||
output_value = std::max(static_cast<int32>(0), output_value);
|
||||
output_value = std::max(static_cast<int32_t>(0), output_value);
|
||||
}
|
||||
return output_value;
|
||||
}
|
||||
@@ -47,7 +49,7 @@ template <typename T>
|
||||
inline void ResizeNearestNeighbor(
|
||||
const tflite::ResizeNearestNeighborParams& op_params,
|
||||
const RuntimeShape& unextended_input_shape, const T* input_data,
|
||||
const RuntimeShape& output_size_shape, const int32* output_size_data,
|
||||
const RuntimeShape& output_size_shape, const int32_t* output_size_data,
|
||||
const RuntimeShape& unextended_output_shape, T* output_data) {
|
||||
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
@@ -57,16 +59,16 @@ inline void ResizeNearestNeighbor(
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
int32 batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
int32 input_height = input_shape.Dims(1);
|
||||
int32 input_width = input_shape.Dims(2);
|
||||
int32 depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
int32_t input_height = input_shape.Dims(1);
|
||||
int32_t input_width = input_shape.Dims(2);
|
||||
int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
|
||||
// The Tensorflow version of this op allows resize on the width and height
|
||||
// axis only.
|
||||
TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2);
|
||||
int32 output_height = output_size_data[0];
|
||||
int32 output_width = output_size_data[1];
|
||||
int32_t output_height = output_size_data[0];
|
||||
int32_t output_width = output_size_data[1];
|
||||
|
||||
const int col_offset = input_shape.Dims(3);
|
||||
const int row_offset = input_shape.Dims(2) * col_offset;
|
||||
@@ -76,14 +78,14 @@ inline void ResizeNearestNeighbor(
|
||||
T* output_ptr = output_data;
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int y = 0; y < output_height; ++y) {
|
||||
int32 in_y = GetNearestNeighbor(y, input_height, output_height,
|
||||
op_params.align_corners,
|
||||
op_params.half_pixel_centers);
|
||||
const T* y_input_ptr = input_ptr + in_y * row_offset;
|
||||
for (int x = 0; x < output_width; ++x) {
|
||||
int32 in_x = GetNearestNeighbor(x, input_width, output_width,
|
||||
int32_t in_y = GetNearestNeighbor(y, input_height, output_height,
|
||||
op_params.align_corners,
|
||||
op_params.half_pixel_centers);
|
||||
const T* y_input_ptr = input_ptr + in_y * row_offset;
|
||||
for (int x = 0; x < output_width; ++x) {
|
||||
int32_t in_x = GetNearestNeighbor(x, input_width, output_width,
|
||||
op_params.align_corners,
|
||||
op_params.half_pixel_centers);
|
||||
const T* x_input_ptr = y_input_ptr + in_x * col_offset;
|
||||
memcpy(output_ptr, x_input_ptr, depth * sizeof(T));
|
||||
output_ptr += depth;
|
||||
|
||||
@@ -16,7 +16,6 @@ limitations under the License.
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
|
||||
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
@@ -49,26 +48,27 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
// Compute sum.
|
||||
float sum = 0.f;
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
sum += std::exp((input_data[i * depth + c] - max) *
|
||||
static_cast<float>(params.beta));
|
||||
const float exp_c = std::exp((input_data[i * depth + c] - max) *
|
||||
static_cast<float>(params.beta));
|
||||
output_data[i * depth + c] = exp_c;
|
||||
sum += exp_c;
|
||||
}
|
||||
|
||||
// Compute result.
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
output_data[i * depth + c] = std::exp((input_data[i * depth + c] - max) *
|
||||
static_cast<float>(params.beta)) /
|
||||
sum;
|
||||
output_data[i * depth + c] = output_data[i * depth + c] / sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Quantized softmax with int8/uint8 input and int8/uint8/int16 output.
|
||||
// Quantized softmax with int8_t/uint8_t input and int8_t/uint8_t/int16_t
|
||||
// output.
|
||||
template <typename InputT, typename OutputT>
|
||||
inline void Softmax(const SoftmaxParams& params,
|
||||
const RuntimeShape& input_shape, const InputT* input_data,
|
||||
const RuntimeShape& output_shape, OutputT* output_data) {
|
||||
const int32 input_beta_multiplier = params.input_multiplier;
|
||||
const int32 input_beta_left_shift = params.input_left_shift;
|
||||
const int32_t input_beta_multiplier = params.input_multiplier;
|
||||
const int32_t input_beta_left_shift = params.input_left_shift;
|
||||
const int diff_min = params.diff_min;
|
||||
// The representation chosen for the input to the exp() function is Q5.26.
|
||||
// We need to leave extra space since values that we skip might be as large as
|
||||
@@ -78,9 +78,10 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
static const int kAccumulationIntegerBits = 12;
|
||||
using FixedPointScaledDiff =
|
||||
gemmlowp::FixedPoint<int32, kScaledDiffIntegerBits>;
|
||||
using FixedPointAccum = gemmlowp::FixedPoint<int32, kAccumulationIntegerBits>;
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32, 0>;
|
||||
gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
|
||||
using FixedPointAccum =
|
||||
gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
|
||||
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int outer_size =
|
||||
@@ -96,10 +97,10 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
|
||||
FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
int32 input_diff =
|
||||
static_cast<int32>(input_data[i * depth + c]) - max_in_row;
|
||||
int32_t input_diff =
|
||||
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
|
||||
if (input_diff >= diff_min) {
|
||||
const int32 input_diff_rescaled =
|
||||
const int32_t input_diff_rescaled =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
input_diff, input_beta_multiplier, input_beta_left_shift);
|
||||
const FixedPointScaledDiff scaled_diff_f8 =
|
||||
@@ -114,28 +115,28 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
sum_of_exps.raw(), kAccumulationIntegerBits, &num_bits_over_unit));
|
||||
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
int32 input_diff =
|
||||
static_cast<int32>(input_data[i * depth + c]) - max_in_row;
|
||||
int32_t input_diff =
|
||||
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
|
||||
if (input_diff >= diff_min) {
|
||||
const int32 input_diff_rescaled =
|
||||
const int32_t input_diff_rescaled =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
input_diff, input_beta_multiplier, input_beta_left_shift);
|
||||
const FixedPointScaledDiff scaled_diff_f8 =
|
||||
FixedPointScaledDiff::FromRaw(input_diff_rescaled);
|
||||
|
||||
FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8);
|
||||
int32 unsat_output = gemmlowp::RoundingDivideByPOT(
|
||||
int32_t unsat_output = gemmlowp::RoundingDivideByPOT(
|
||||
(shifted_scale * exp_in_0).raw(),
|
||||
num_bits_over_unit + 31 - (sizeof(OutputT) * 8));
|
||||
|
||||
const int32 shifted_output =
|
||||
const int32_t shifted_output =
|
||||
unsat_output +
|
||||
static_cast<int32>(std::numeric_limits<OutputT>::min());
|
||||
static_cast<int32_t>(std::numeric_limits<OutputT>::min());
|
||||
|
||||
output_data[i * depth + c] = static_cast<OutputT>(std::max(
|
||||
std::min(shifted_output,
|
||||
static_cast<int32>(std::numeric_limits<OutputT>::max())),
|
||||
static_cast<int32>(std::numeric_limits<OutputT>::min())));
|
||||
static_cast<int32_t>(std::numeric_limits<OutputT>::max())),
|
||||
static_cast<int32_t>(std::numeric_limits<OutputT>::min())));
|
||||
} else {
|
||||
output_data[i * depth + c] = std::numeric_limits<OutputT>::min();
|
||||
}
|
||||
@@ -143,7 +144,24 @@ inline void Softmax(const SoftmaxParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
// Quantized softmax with int16 input and int16 output.
|
||||
// Computes exp(input - max_input)
|
||||
inline int16_t SoftMaxCalculateExp(const SoftmaxParams& params,
|
||||
const int16_t* input_data, const int depth,
|
||||
int16_t max_in_row, int i, int c) {
|
||||
int32_t input_diff = input_data[i * depth + c] - max_in_row;
|
||||
// scale the input_diff such that [-65535, 0] correspond to [-10.0, 0.0]
|
||||
// exp lut generated with range [-10, 0], as exp(-10) is negligible.
|
||||
int32_t scaled_diff = MultiplyByQuantizedMultiplier(
|
||||
input_diff, params.input_multiplier, params.input_left_shift);
|
||||
// recenter to [-32768, 32767]
|
||||
int32_t sym_scaled_diff = scaled_diff + 32767;
|
||||
int16_t sat_sym_scaled_diff =
|
||||
std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
|
||||
static_cast<int32_t>(32767));
|
||||
// apply the exp() LUT activation function
|
||||
return generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
|
||||
}
|
||||
// Quantized softmax with int16_t input and int16_t output.
|
||||
inline void SoftmaxInt16(const SoftmaxParams& params,
|
||||
const RuntimeShape& input_shape,
|
||||
const int16_t* input_data,
|
||||
@@ -162,28 +180,16 @@ inline void SoftmaxInt16(const SoftmaxParams& params,
|
||||
max_in_row = std::max(max_in_row, input_data[i * depth + c]);
|
||||
}
|
||||
|
||||
// Compute exp(input - max_input)
|
||||
std::vector<int16_t> exp_result_Q015(depth);
|
||||
// This loops computes the exp values and their sum. We will need the exp
|
||||
// values later on in the function so we cache them in the output_data
|
||||
// buffer. This is an optimization done to avoid calculating the exp values
|
||||
// twice making use of the output_data buffer as scratch memory.
|
||||
int32_t sum_of_exps = 0; // Q16.15 fixed point format.
|
||||
int16_t* exp_results_Q015 = output_data + i * depth;
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
int32_t input_diff = input_data[i * depth + c] - max_in_row;
|
||||
// scale the input_diff such that [-65535, 0] correspond to [-10.0, 0.0]
|
||||
int32_t scaled_diff = MultiplyByQuantizedMultiplier(
|
||||
input_diff, params.input_multiplier, params.input_left_shift);
|
||||
// recenter to [-32768, 32767]
|
||||
int32_t sym_scaled_diff = scaled_diff + 32767;
|
||||
int16_t sat_sym_scaled_diff =
|
||||
std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
|
||||
static_cast<int32_t>(32767));
|
||||
// apply the exp() LUT activation function
|
||||
exp_result_Q015[c] =
|
||||
generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
|
||||
}
|
||||
|
||||
// sum_of_exps is a Q16.15 fixed point format.
|
||||
int32_t sum_of_exps = 0;
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
// Q16.15 + Q0.15
|
||||
sum_of_exps += exp_result_Q015[c];
|
||||
exp_results_Q015[c] =
|
||||
SoftMaxCalculateExp(params, input_data, depth, max_in_row, i, c);
|
||||
sum_of_exps += exp_results_Q015[c];
|
||||
}
|
||||
|
||||
// Compute the reciprocal 1/sum_of_exps
|
||||
@@ -209,7 +215,7 @@ inline void SoftmaxInt16(const SoftmaxParams& params,
|
||||
for (int c = 0; c < depth; ++c) {
|
||||
uint8_t right_shift = 31 - headroom_plus_one;
|
||||
int64_t round = 1 << (right_shift - 1);
|
||||
int32_t result = (static_cast<int64_t>(exp_result_Q015[c]) *
|
||||
int32_t result = (static_cast<int64_t>(exp_results_Q015[c]) *
|
||||
static_cast<int64_t>(reciprocal_scale_Q015) +
|
||||
round) >>
|
||||
right_shift;
|
||||
|
||||
@@ -16,8 +16,10 @@ limitations under the License.
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_STRIDED_SLICE_H_
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
@@ -15,9 +15,15 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SUB_H_
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -41,11 +47,11 @@ inline void SubNonBroadcast(const ArithmeticParams& params,
|
||||
|
||||
inline void SubNonBroadcast(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const int32_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const int32_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
int32_t* output_data) {
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
@@ -106,12 +112,12 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
template <int N = 5>
|
||||
inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const uint8* input1_data,
|
||||
const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const uint8* input2_data,
|
||||
const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
uint8* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8");
|
||||
uint8_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8_t");
|
||||
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
|
||||
@@ -134,28 +140,28 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
auto sub_func = [&](int indexes[N]) {
|
||||
const int32 input1_val =
|
||||
const int32_t input1_val =
|
||||
params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
|
||||
const int32 input2_val =
|
||||
const int32_t input2_val =
|
||||
params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sub, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
static_cast<uint8>(clamped_output);
|
||||
static_cast<uint8_t>(clamped_output);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
@@ -163,12 +169,12 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
template <int N = 5>
|
||||
inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const int32_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const int32_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32");
|
||||
int32_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32_t");
|
||||
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
|
||||
@@ -208,7 +214,7 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const int8_t* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8");
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8_t");
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
@@ -254,6 +260,45 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
template <int N = 5>
|
||||
void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int64_t* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int64_t* input2_data,
|
||||
const RuntimeShape& output_shape, int64_t* output_data) {
|
||||
ruy::profiler::ScopeLabel label("BroadcastSubSlow/int64_t");
|
||||
TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
|
||||
&desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest stride,
|
||||
// typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for the
|
||||
// best cache behavior.
|
||||
auto sub_func = [&](int indexes[N]) {
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
ActivationFunctionWithMinMax(
|
||||
input1_data[SubscriptToIndex(desc1, indexes)] -
|
||||
input2_data[SubscriptToIndex(desc2, indexes)],
|
||||
params.int64_activation_min, params.int64_activation_max);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, sub_func);
|
||||
}
|
||||
|
||||
template <typename T, int N = 5>
|
||||
void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const T* input1_data,
|
||||
@@ -294,33 +339,33 @@ void BroadcastSubSlow(const ArithmeticParams& params,
|
||||
// Element-wise Sub that can often be used for inner loop of broadcast sub as
|
||||
// well as the non-broadcast sub.
|
||||
inline void SubElementwise(int size, const ArithmeticParams& params,
|
||||
const uint8* input1_data, const uint8* input2_data,
|
||||
uint8* output_data) {
|
||||
const uint8_t* input1_data,
|
||||
const uint8_t* input2_data, uint8_t* output_data) {
|
||||
TFLITE_DCHECK_GT(params.input1_offset, -256);
|
||||
TFLITE_DCHECK_GT(params.input2_offset, -256);
|
||||
TFLITE_DCHECK_LT(params.input1_offset, 256);
|
||||
TFLITE_DCHECK_LT(params.input2_offset, 256);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sub, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<uint8>(clamped_output);
|
||||
output_data[i] = static_cast<uint8_t>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -336,22 +381,22 @@ inline void SubElementwise(int size, const ArithmeticParams& params,
|
||||
TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
const int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
const int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sub, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32 clamped_output =
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
output_data[i] = static_cast<int8_t>(clamped_output);
|
||||
@@ -359,9 +404,9 @@ inline void SubElementwise(int size, const ArithmeticParams& params,
|
||||
}
|
||||
|
||||
inline void Sub(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8* input2_data,
|
||||
const RuntimeShape& output_shape, uint8* output_data) {
|
||||
const RuntimeShape& input1_shape, const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape, uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
@@ -428,40 +473,43 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape,
|
||||
}
|
||||
}
|
||||
|
||||
inline void SubWithActivation(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int32* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int32* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int32* output_data) {
|
||||
inline void SetActivationMinMax(const ArithmeticParams& params,
|
||||
int32_t* activation_min,
|
||||
int32_t* activation_max) {
|
||||
*activation_min = params.quantized_activation_min;
|
||||
*activation_max = params.quantized_activation_max;
|
||||
}
|
||||
|
||||
inline void SetActivationMinMax(const ArithmeticParams& params,
|
||||
float* activation_min, float* activation_max) {
|
||||
*activation_min = params.float_activation_min;
|
||||
*activation_max = params.float_activation_max;
|
||||
}
|
||||
|
||||
inline void SetActivationMinMax(const ArithmeticParams& params,
|
||||
int64_t* activation_min,
|
||||
int64_t* activation_max) {
|
||||
*activation_min = params.int64_activation_min;
|
||||
*activation_max = params.int64_activation_max;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void SubWithActivation(
|
||||
const ArithmeticParams& params, const RuntimeShape& input1_shape,
|
||||
const T* input1_data, const RuntimeShape& input2_shape,
|
||||
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
|
||||
ruy::profiler::ScopeLabel label("SubWithActivation");
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
T activation_min, activation_max;
|
||||
SetActivationMinMax(params, &activation_min, &activation_max);
|
||||
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] - input2_data[i], params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
input1_data[i] - input2_data[i], activation_min, activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
inline void SubWithActivation(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const float* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const float* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] - input2_data[i], params.float_activation_min,
|
||||
params.float_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
inline void Tanh(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
float val = input_data[i];
|
||||
float result = std::tanh(val);
|
||||
output_data[i] = result;
|
||||
}
|
||||
}
|
||||
|
||||
// Convenience version that allows, for example, generated-code calls to be
|
||||
// uniform between data types.
|
||||
inline void Tanh(const TanhParams&, const RuntimeShape& input_shape,
|
||||
const float* input_data, const RuntimeShape& output_shape,
|
||||
float* output_data) {
|
||||
// Drop params: not needed.
|
||||
Tanh(input_shape, input_data, output_shape, output_data);
|
||||
}
|
||||
|
||||
inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
|
||||
const int16_t* input_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
const int input_left_shift = params.input_left_shift;
|
||||
// Support for shifts is limited until we have a parameterized version of
|
||||
// SaturatingRoundingMultiplyByPOT().
|
||||
TFLITE_DCHECK_GE(input_left_shift, 0);
|
||||
TFLITE_DCHECK_LE(input_left_shift, 1);
|
||||
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
// F0 uses 0 integer bits, range [-1, 1].
|
||||
// This is the return type of math functions such as tanh, logistic,
|
||||
// whose range is in [-1, 1].
|
||||
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
|
||||
// F3 uses 3 integer bits, range [-8, 8], the input range expected here.
|
||||
using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
|
||||
|
||||
if (input_left_shift == 0) {
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
F3 input = F3::FromRaw(input_data[i]);
|
||||
F0 output = gemmlowp::tanh(input);
|
||||
output_data[i] = output.raw();
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
F3 input = F3::FromRaw(
|
||||
gemmlowp::SaturatingRoundingMultiplyByPOT<1>(input_data[i]));
|
||||
F0 output = gemmlowp::tanh(input);
|
||||
output_data[i] = output.raw();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
|
||||
const uint8_t* input_data, const RuntimeShape& output_shape,
|
||||
uint8_t* output_data) {
|
||||
const int32_t input_zero_point = params.input_zero_point;
|
||||
const int32_t input_range_radius = params.input_range_radius;
|
||||
const int32_t input_multiplier = params.input_multiplier;
|
||||
const int input_left_shift = params.input_left_shift;
|
||||
const int32_t output_zero_point = 128;
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
|
||||
for (int i = 0; i < flat_size; i++) {
|
||||
const uint8_t input_val_u8 = input_data[i];
|
||||
const int32_t input_val_centered =
|
||||
static_cast<int32_t>(input_val_u8) - input_zero_point;
|
||||
uint8_t output_val;
|
||||
if (input_val_centered <= -input_range_radius) {
|
||||
output_val = 0;
|
||||
} else if (input_val_centered >= input_range_radius) {
|
||||
output_val = 255;
|
||||
} else {
|
||||
const int32_t input_val_rescaled =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(
|
||||
input_val_centered, input_multiplier, input_left_shift);
|
||||
using FixedPoint4 = gemmlowp::FixedPoint<int32_t, 4>;
|
||||
using FixedPoint0 = gemmlowp::FixedPoint<int32_t, 0>;
|
||||
const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled);
|
||||
const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4);
|
||||
// Convert from Q0.31 to Q24.7.
|
||||
using gemmlowp::RoundingDivideByPOT;
|
||||
int32_t output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 24);
|
||||
output_val_s32 += output_zero_point;
|
||||
if (output_val_s32 == 256) {
|
||||
output_val_s32 = 255;
|
||||
}
|
||||
// Reinterpret as Q0.7, encoded in uint8_t.
|
||||
TFLITE_DCHECK_GE(output_val_s32, 0);
|
||||
TFLITE_DCHECK_LE(output_val_s32, 255);
|
||||
output_val = static_cast<uint8_t>(output_val_s32);
|
||||
}
|
||||
output_data[i] = output_val;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TANH_H_
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
@@ -69,8 +70,8 @@ inline void StridedSlicePadIndices(tflite::StridedSliceParams* p,
|
||||
}
|
||||
|
||||
// Return the index for the first element along that axis. This index will be a
|
||||
// positive integer between [0, axis_size - 1] that can be used to index
|
||||
// directly into the data.
|
||||
// positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0)
|
||||
// that can be used to index directly into the data.
|
||||
inline int StartForAxis(const tflite::StridedSliceParams& params,
|
||||
const RuntimeShape& input_shape, int axis) {
|
||||
const auto begin_mask = params.begin_mask;
|
||||
@@ -102,7 +103,13 @@ inline int StartForAxis(const tflite::StridedSliceParams& params,
|
||||
}
|
||||
|
||||
// Clamping
|
||||
start = Clamp(start, 0, axis_size - 1);
|
||||
if (strides[axis] > 0) {
|
||||
// Forward iteration
|
||||
start = Clamp(start, 0, axis_size);
|
||||
} else {
|
||||
// Backward iteration
|
||||
start = Clamp(start, -1, axis_size - 1);
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
@@ -24,24 +24,29 @@ limitations under the License.
|
||||
|
||||
namespace tflite {
|
||||
|
||||
enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu };
|
||||
enum class PaddingType : uint8 { kNone, kSame, kValid };
|
||||
enum class FusedActivationFunctionType : uint8_t {
|
||||
kNone,
|
||||
kRelu6,
|
||||
kRelu1,
|
||||
kRelu
|
||||
};
|
||||
enum class PaddingType : uint8_t { kNone, kSame, kValid };
|
||||
|
||||
struct PaddingValues {
|
||||
int16 width;
|
||||
int16 height;
|
||||
int16_t width;
|
||||
int16_t height;
|
||||
// offset is used for calculating "remaining" padding, for example, `width`
|
||||
// is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is
|
||||
// 1 + 1 = 2.
|
||||
int16 width_offset;
|
||||
int16_t width_offset;
|
||||
// Same as width_offset except it's over the height dimension.
|
||||
int16 height_offset;
|
||||
int16_t height_offset;
|
||||
};
|
||||
|
||||
// This enumeration allows for non-default formats for the weights array
|
||||
// of a fully-connected operator, allowing the use of special optimized
|
||||
// runtime paths.
|
||||
enum class FullyConnectedWeightsFormat : uint8 {
|
||||
enum class FullyConnectedWeightsFormat : uint8_t {
|
||||
// Default format (flat 2D layout, the inner contiguous dimension
|
||||
// is input_depth, the outer non-contiguous dimension is output_depth)
|
||||
kDefault,
|
||||
@@ -88,11 +93,11 @@ enum class FullyConnectedWeightsFormat : uint8 {
|
||||
// maximize arithmetic throughput.
|
||||
//
|
||||
// Finally, the 'Int8' part in the name refers to the fact that this
|
||||
// weights format has each weights value encoded as a signed int8 value,
|
||||
// even if the data type of the weights buffer is uint8. This is intended
|
||||
// weights format has each weights value encoded as a signed int8_t value,
|
||||
// even if the data type of the weights buffer is uint8_t. This is intended
|
||||
// to save runtime kernels the effort to have to XOR the top bit of these
|
||||
// bytes before using them in signed arithmetic, see this file for more
|
||||
// explanations on the 'signed int8 trick' in matrix multiplication kernels:
|
||||
// explanations on the 'signed int8_t trick' in matrix multiplication kernels:
|
||||
//
|
||||
// tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
|
||||
//
|
||||
@@ -111,7 +116,7 @@ enum class FullyConnectedWeightsFormat : uint8 {
|
||||
// the real 0 value, and scale designates the difference between the real values
|
||||
// corresponding to consecutive quantized values differing by 1.
|
||||
struct QuantizationParams {
|
||||
int32 zero_point = 0;
|
||||
int32_t zero_point = 0;
|
||||
double scale = 0.0;
|
||||
};
|
||||
|
||||
@@ -140,20 +145,20 @@ class RuntimeShape {
|
||||
if (dimensions_count > kMaxSmallSize) {
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
TFLITE_CHECK(false && "No shape resizing supported on this platform");
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
dims_pointer_ = new int32[dimensions_count];
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
dims_pointer_ = new int32_t[dimensions_count];
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
}
|
||||
}
|
||||
|
||||
RuntimeShape(int shape_size, int32 value) : size_(0) {
|
||||
RuntimeShape(int shape_size, int32_t value) : size_(0) {
|
||||
Resize(shape_size);
|
||||
for (int i = 0; i < shape_size; ++i) {
|
||||
SetDim(i, value);
|
||||
}
|
||||
}
|
||||
|
||||
RuntimeShape(int dimensions_count, const int32* dims_data) : size_(0) {
|
||||
RuntimeShape(int dimensions_count, const int32_t* dims_data) : size_(0) {
|
||||
ReplaceWith(dimensions_count, dims_data);
|
||||
}
|
||||
|
||||
@@ -165,33 +170,34 @@ class RuntimeShape {
|
||||
// rolls out.
|
||||
RuntimeShape(RuntimeShape const& other) : size_(other.DimensionsCount()) {
|
||||
if (size_ > kMaxSmallSize) {
|
||||
dims_pointer_ = new int32[size_];
|
||||
dims_pointer_ = new int32_t[size_];
|
||||
}
|
||||
std::memcpy(DimsData(), other.DimsData(), sizeof(int32) * size_);
|
||||
std::memcpy(DimsData(), other.DimsData(), sizeof(int32_t) * size_);
|
||||
}
|
||||
|
||||
bool operator==(const RuntimeShape& comp) const {
|
||||
return this->size_ == comp.size_ &&
|
||||
std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32)) == 0;
|
||||
std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32_t)) ==
|
||||
0;
|
||||
}
|
||||
|
||||
~RuntimeShape() {
|
||||
if (size_ > kMaxSmallSize) {
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
TFLITE_CHECK(false && "No shape resizing supported on this platform");
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
delete[] dims_pointer_;
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
}
|
||||
}
|
||||
|
||||
inline int32 DimensionsCount() const { return size_; }
|
||||
inline int32 Dims(int i) const {
|
||||
inline int32_t DimensionsCount() const { return size_; }
|
||||
inline int32_t Dims(int i) const {
|
||||
TFLITE_DCHECK_GE(i, 0);
|
||||
TFLITE_DCHECK_LT(i, size_);
|
||||
return size_ > kMaxSmallSize ? dims_pointer_[i] : dims_[i];
|
||||
}
|
||||
inline void SetDim(int i, int32 val) {
|
||||
inline void SetDim(int i, int32_t val) {
|
||||
TFLITE_DCHECK_GE(i, 0);
|
||||
TFLITE_DCHECK_LT(i, size_);
|
||||
if (size_ > kMaxSmallSize) {
|
||||
@@ -201,20 +207,20 @@ class RuntimeShape {
|
||||
}
|
||||
}
|
||||
|
||||
inline int32* DimsData() {
|
||||
inline int32_t* DimsData() {
|
||||
return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
|
||||
}
|
||||
inline const int32* DimsData() const {
|
||||
inline const int32_t* DimsData() const {
|
||||
return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
|
||||
}
|
||||
// The caller must ensure that the shape is no bigger than 5-D.
|
||||
inline const int32* DimsDataUpTo5D() const { return dims_; }
|
||||
inline const int32_t* DimsDataUpTo5D() const { return dims_; }
|
||||
|
||||
inline void Resize(int dimensions_count) {
|
||||
if (size_ > kMaxSmallSize) {
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
TFLITE_CHECK(false && "No shape resizing supported on this platform");
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
delete[] dims_pointer_;
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
}
|
||||
@@ -222,16 +228,16 @@ class RuntimeShape {
|
||||
if (dimensions_count > kMaxSmallSize) {
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
TFLITE_CHECK(false && "No shape resizing supported on this platform");
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
dims_pointer_ = new int32[dimensions_count];
|
||||
#else // TF_LITE_STATIC_MEMORY
|
||||
dims_pointer_ = new int32_t[dimensions_count];
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
}
|
||||
}
|
||||
|
||||
inline void ReplaceWith(int dimensions_count, const int32* dims_data) {
|
||||
inline void ReplaceWith(int dimensions_count, const int32_t* dims_data) {
|
||||
Resize(dimensions_count);
|
||||
int32* dst_dims = DimsData();
|
||||
std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32));
|
||||
int32_t* dst_dims = DimsData();
|
||||
std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@@ -239,7 +245,7 @@ class RuntimeShape {
|
||||
const int dimensions_count =
|
||||
std::distance(src_iterable.begin(), src_iterable.end());
|
||||
Resize(dimensions_count);
|
||||
int32* data = DimsData();
|
||||
int32_t* data = DimsData();
|
||||
for (auto it : src_iterable) {
|
||||
*data = it;
|
||||
++data;
|
||||
@@ -288,13 +294,13 @@ class RuntimeShape {
|
||||
SetDim(i, pad_value);
|
||||
}
|
||||
std::memcpy(DimsData() + size_increase, shape.DimsData(),
|
||||
sizeof(int32) * shape.DimensionsCount());
|
||||
sizeof(int32_t) * shape.DimensionsCount());
|
||||
}
|
||||
|
||||
int32 size_;
|
||||
int32_t size_;
|
||||
union {
|
||||
int32 dims_[kMaxSmallSize];
|
||||
int32* dims_pointer_;
|
||||
int32_t dims_[kMaxSmallSize];
|
||||
int32_t* dims_pointer_;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -432,7 +438,7 @@ int MatchingArraySize(const ArrayType1& array1, int index1,
|
||||
inline int MatchingDim(const RuntimeShape& shape1, int index1,
|
||||
const RuntimeShape& shape2, int index2) {
|
||||
TFLITE_DCHECK_EQ(shape1.Dims(index1), shape2.Dims(index2));
|
||||
return shape1.Dims(index1);
|
||||
return std::min(shape1.Dims(index1), shape2.Dims(index2));
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
@@ -713,7 +719,7 @@ void ComputeStrides(Dims<N>* dims) {
|
||||
}
|
||||
}
|
||||
|
||||
enum class BroadcastableOpCategory : uint8 {
|
||||
enum class BroadcastableOpCategory : uint8_t {
|
||||
kNone,
|
||||
kNonBroadcast, // Matching input shapes.
|
||||
kFirstInputBroadcastsFast, // Fivefold nested loops.
|
||||
@@ -729,21 +735,21 @@ static_assert(sizeof(MinMax) == 8, "");
|
||||
|
||||
struct ActivationParams {
|
||||
FusedActivationFunctionType activation_type;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
};
|
||||
|
||||
struct ReluParams : public ActivationParams {
|
||||
int32 input_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int32 output_shift;
|
||||
int32_t input_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
};
|
||||
|
||||
// Styles of resizing op usages. For example, kImageStyle can be used with a Pad
|
||||
// op for pattern-specific optimization.
|
||||
enum class ResizingCategory : uint8 {
|
||||
enum class ResizingCategory : uint8_t {
|
||||
kNone,
|
||||
kImageStyle, // 4D, operating on inner dimensions, say {0, a, b, 0}.
|
||||
kGenericResize,
|
||||
@@ -753,24 +759,29 @@ enum class ResizingCategory : uint8 {
|
||||
struct ArithmeticParams {
|
||||
// Shape dependent / common to data / op types.
|
||||
BroadcastableOpCategory broadcast_category;
|
||||
// uint8 inference params.
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
// uint8_t inference params.
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// Add / Sub, not Mul, uint8 inference params.
|
||||
// Add / Sub, not Mul, uint8_t inference params.
|
||||
int left_shift;
|
||||
int32 input1_multiplier;
|
||||
int32_t input1_multiplier;
|
||||
int input1_shift;
|
||||
int32 input2_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int input2_shift;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
|
||||
// TODO(b/158622529): Union the following activation params.
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
// float activation params.
|
||||
float float_activation_min;
|
||||
float float_activation_max;
|
||||
// int64_t activation params.
|
||||
int64_t int64_activation_min;
|
||||
int64_t int64_activation_max;
|
||||
|
||||
// Processed output dimensions.
|
||||
// Let input "a" be the one that broadcasts in the faster-changing dimension.
|
||||
@@ -785,22 +796,22 @@ struct ArithmeticParams {
|
||||
};
|
||||
|
||||
struct ConcatenationParams {
|
||||
int8 axis;
|
||||
const int32* input_zeropoint;
|
||||
int8_t axis;
|
||||
const int32_t* input_zeropoint;
|
||||
const float* input_scale;
|
||||
uint16 inputs_count;
|
||||
int32 output_zeropoint;
|
||||
uint16_t inputs_count;
|
||||
int32_t output_zeropoint;
|
||||
float output_scale;
|
||||
};
|
||||
|
||||
struct ComparisonParams {
|
||||
// uint8 inference params.
|
||||
// uint8_t inference params.
|
||||
int left_shift;
|
||||
int32 input1_offset;
|
||||
int32 input1_multiplier;
|
||||
int32_t input1_offset;
|
||||
int32_t input1_multiplier;
|
||||
int input1_shift;
|
||||
int32 input2_offset;
|
||||
int32 input2_multiplier;
|
||||
int32_t input2_offset;
|
||||
int32_t input2_multiplier;
|
||||
int input2_shift;
|
||||
// Shape dependent / common to inference types.
|
||||
bool is_broadcast;
|
||||
@@ -810,81 +821,81 @@ struct ConvParams {
|
||||
PaddingType padding_type;
|
||||
PaddingValues padding_values;
|
||||
// TODO(starka): This was just "stride", so check that width+height is OK.
|
||||
int16 stride_width;
|
||||
int16 stride_height;
|
||||
int16 dilation_width_factor;
|
||||
int16 dilation_height_factor;
|
||||
// uint8 inference params.
|
||||
int16_t stride_width;
|
||||
int16_t stride_height;
|
||||
int16_t dilation_width_factor;
|
||||
int16_t dilation_height_factor;
|
||||
// uint8_t inference params.
|
||||
// TODO(b/65838351): Use smaller types if appropriate.
|
||||
int32 input_offset;
|
||||
int32 weights_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int32_t input_offset;
|
||||
int32_t weights_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
// float activation params.
|
||||
float float_activation_min;
|
||||
float float_activation_max;
|
||||
};
|
||||
|
||||
struct DepthToSpaceParams {
|
||||
int32 block_size;
|
||||
int32_t block_size;
|
||||
};
|
||||
|
||||
struct DepthwiseParams {
|
||||
PaddingType padding_type;
|
||||
PaddingValues padding_values;
|
||||
int16 stride_width;
|
||||
int16 stride_height;
|
||||
int16 dilation_width_factor;
|
||||
int16 dilation_height_factor;
|
||||
int16 depth_multiplier;
|
||||
// uint8 inference params.
|
||||
int16_t stride_width;
|
||||
int16_t stride_height;
|
||||
int16_t dilation_width_factor;
|
||||
int16_t dilation_height_factor;
|
||||
int16_t depth_multiplier;
|
||||
// uint8_t inference params.
|
||||
// TODO(b/65838351): Use smaller types if appropriate.
|
||||
int32 input_offset;
|
||||
int32 weights_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int32_t input_offset;
|
||||
int32_t weights_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
// float activation params.
|
||||
float float_activation_min;
|
||||
float float_activation_max;
|
||||
const int32* output_multiplier_per_channel;
|
||||
const int32* output_shift_per_channel;
|
||||
const int32_t* output_multiplier_per_channel;
|
||||
const int32_t* output_shift_per_channel;
|
||||
};
|
||||
|
||||
struct DequantizationParams {
|
||||
double scale;
|
||||
int32 zero_point;
|
||||
int32_t zero_point;
|
||||
};
|
||||
|
||||
struct PerChannelDequantizationParams {
|
||||
const float* scale;
|
||||
const int32* zero_point;
|
||||
int32 quantized_dimension;
|
||||
const int32_t* zero_point;
|
||||
int32_t quantized_dimension;
|
||||
};
|
||||
|
||||
struct FakeQuantParams {
|
||||
MinMax minmax;
|
||||
int32 num_bits;
|
||||
int32_t num_bits;
|
||||
};
|
||||
|
||||
struct FullyConnectedParams {
|
||||
// uint8 inference params.
|
||||
// uint8_t inference params.
|
||||
// TODO(b/65838351): Use smaller types if appropriate.
|
||||
int32 input_offset;
|
||||
int32 weights_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int32_t input_offset;
|
||||
int32_t weights_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
// float activation params.
|
||||
float float_activation_min;
|
||||
float float_activation_max;
|
||||
@@ -895,16 +906,16 @@ struct FullyConnectedParams {
|
||||
};
|
||||
|
||||
struct GatherParams {
|
||||
int16 axis;
|
||||
int16_t axis;
|
||||
};
|
||||
|
||||
struct L2NormalizationParams {
|
||||
// uint8 inference params.
|
||||
int32 input_zero_point;
|
||||
// uint8_t inference params.
|
||||
int32_t input_zero_point;
|
||||
};
|
||||
|
||||
struct LocalResponseNormalizationParams {
|
||||
int32 range;
|
||||
int32_t range;
|
||||
double bias;
|
||||
double alpha;
|
||||
double beta;
|
||||
@@ -932,48 +943,50 @@ struct HardSwishParams {
|
||||
};
|
||||
|
||||
struct LogisticParams {
|
||||
// uint8 inference params.
|
||||
int32 input_zero_point;
|
||||
int32 input_range_radius;
|
||||
int32 input_multiplier;
|
||||
// uint8_t inference params.
|
||||
int32_t input_zero_point;
|
||||
int32_t input_range_radius;
|
||||
int32_t input_multiplier;
|
||||
int input_left_shift;
|
||||
};
|
||||
|
||||
struct LstmCellParams {
|
||||
int32 weights_zero_point;
|
||||
int32 accum_multiplier;
|
||||
int32_t weights_zero_point;
|
||||
int32_t accum_multiplier;
|
||||
int accum_shift;
|
||||
int state_integer_bits;
|
||||
};
|
||||
|
||||
struct MeanParams {
|
||||
int8 axis_count;
|
||||
int16 axis[4];
|
||||
int8_t axis_count;
|
||||
int16_t axis[4];
|
||||
};
|
||||
|
||||
struct PackParams {
|
||||
int8 axis;
|
||||
const int32* input_zeropoint;
|
||||
int8_t axis;
|
||||
const int32_t* input_zeropoint;
|
||||
const float* input_scale;
|
||||
uint16 inputs_count;
|
||||
int32 output_zeropoint;
|
||||
uint16_t inputs_count;
|
||||
int32_t output_zeropoint;
|
||||
float output_scale;
|
||||
};
|
||||
|
||||
struct PadParams {
|
||||
int8 left_padding_count;
|
||||
int32 left_padding[4];
|
||||
int8 right_padding_count;
|
||||
int32 right_padding[4];
|
||||
int8_t left_padding_count;
|
||||
int32_t left_padding[4];
|
||||
int8_t right_padding_count;
|
||||
int32_t right_padding[4];
|
||||
ResizingCategory resizing_category;
|
||||
};
|
||||
|
||||
struct PreluParams {
|
||||
int32 input_offset;
|
||||
int32 alpha_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier;
|
||||
int output_shift;
|
||||
int32_t input_offset;
|
||||
int32_t alpha_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier_1;
|
||||
int output_shift_1;
|
||||
int32_t output_multiplier_2;
|
||||
int output_shift_2;
|
||||
};
|
||||
|
||||
struct PoolParams {
|
||||
@@ -984,17 +997,17 @@ struct PoolParams {
|
||||
int stride_width;
|
||||
int filter_height;
|
||||
int filter_width;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
// uint8_t, etc, activation params.
|
||||
int32_t quantized_activation_min;
|
||||
int32_t quantized_activation_max;
|
||||
// float activation params.
|
||||
float float_activation_min;
|
||||
float float_activation_max;
|
||||
};
|
||||
|
||||
struct ReshapeParams {
|
||||
int8 shape_count;
|
||||
int32 shape[4];
|
||||
int8_t shape_count;
|
||||
int32_t shape[4];
|
||||
};
|
||||
|
||||
struct ResizeBilinearParams {
|
||||
@@ -1011,91 +1024,95 @@ struct ResizeNearestNeighborParams {
|
||||
};
|
||||
|
||||
struct SliceParams {
|
||||
int8 begin_count;
|
||||
int32 begin[4];
|
||||
int8 size_count;
|
||||
int32 size[4];
|
||||
int8_t begin_count;
|
||||
int32_t begin[4];
|
||||
int8_t size_count;
|
||||
int32_t size[4];
|
||||
};
|
||||
|
||||
struct SoftmaxParams {
|
||||
// beta is not really used (not a Tensorflow parameter) and not implemented
|
||||
// for LogSoftmax.
|
||||
double beta;
|
||||
// uint8 inference params. Used even when beta defaults to 1.0.
|
||||
int32 input_multiplier;
|
||||
int32 input_left_shift;
|
||||
// uint8_t inference params. Used even when beta defaults to 1.0.
|
||||
int32_t input_multiplier;
|
||||
int32_t input_left_shift;
|
||||
// Reverse scaling is only used by LogSoftmax.
|
||||
int32 reverse_scaling_divisor;
|
||||
int32 reverse_scaling_right_shift;
|
||||
int32_t reverse_scaling_divisor;
|
||||
int32_t reverse_scaling_right_shift;
|
||||
int diff_min;
|
||||
int32_t zero_point;
|
||||
float scale;
|
||||
float* table;
|
||||
// int16 LUT for exp(x), where x uniform distributed between [-10.0 , 0.0]
|
||||
int16_t* exp_lut;
|
||||
// int16 LUT for 1 / (1 + x), where x uniform distributed between [0.0 , 1.0]
|
||||
int16_t* one_over_one_plus_x_lut;
|
||||
uint8_t* uint8_table1;
|
||||
uint8_t* uint8_table2;
|
||||
};
|
||||
|
||||
struct SpaceToBatchParams {
|
||||
// "Zero" padding for uint8 means padding with the output offset.
|
||||
int32 output_offset;
|
||||
// "Zero" padding for uint8_t means padding with the output offset.
|
||||
int32_t output_offset;
|
||||
};
|
||||
|
||||
struct SpaceToDepthParams {
|
||||
int32 block_size;
|
||||
int32_t block_size;
|
||||
};
|
||||
|
||||
struct SplitParams {
|
||||
// Graphs that split into, say, 2000 nodes are encountered. The indices in
|
||||
// OperatorEdges are of type uint16.
|
||||
uint16 num_split;
|
||||
int16 axis;
|
||||
// OperatorEdges are of type uint16_t.
|
||||
uint16_t num_split;
|
||||
int16_t axis;
|
||||
};
|
||||
|
||||
struct SqueezeParams {
|
||||
int8 squeeze_dims_count;
|
||||
int32 squeeze_dims[4];
|
||||
int8_t squeeze_dims_count;
|
||||
int32_t squeeze_dims[4];
|
||||
};
|
||||
|
||||
struct StridedSliceParams {
|
||||
int8 start_indices_count;
|
||||
int32 start_indices[5];
|
||||
int8 stop_indices_count;
|
||||
int32 stop_indices[5];
|
||||
int8 strides_count;
|
||||
int32 strides[5];
|
||||
int8_t start_indices_count;
|
||||
int32_t start_indices[5];
|
||||
int8_t stop_indices_count;
|
||||
int32_t stop_indices[5];
|
||||
int8_t strides_count;
|
||||
int32_t strides[5];
|
||||
|
||||
int16 begin_mask;
|
||||
int16 ellipsis_mask;
|
||||
int16 end_mask;
|
||||
int16 new_axis_mask;
|
||||
int16 shrink_axis_mask;
|
||||
int16_t begin_mask;
|
||||
int16_t ellipsis_mask;
|
||||
int16_t end_mask;
|
||||
int16_t new_axis_mask;
|
||||
int16_t shrink_axis_mask;
|
||||
};
|
||||
|
||||
struct TanhParams {
|
||||
int32 input_zero_point;
|
||||
int32 input_range_radius;
|
||||
int32 input_multiplier;
|
||||
int32_t input_zero_point;
|
||||
int32_t input_range_radius;
|
||||
int32_t input_multiplier;
|
||||
int input_left_shift;
|
||||
};
|
||||
|
||||
struct TransposeParams {
|
||||
int8 perm_count;
|
||||
int32 perm[5];
|
||||
int8_t perm_count;
|
||||
int32_t perm[5];
|
||||
};
|
||||
|
||||
struct UnpackParams {
|
||||
uint16 num_split;
|
||||
int16 axis;
|
||||
uint16_t num_split;
|
||||
int16_t axis;
|
||||
};
|
||||
|
||||
struct LeakyReluParams {
|
||||
float alpha;
|
||||
int32 input_offset;
|
||||
int32 output_offset;
|
||||
int32 output_multiplier_alpha;
|
||||
int32 output_shift_alpha;
|
||||
int32 output_multiplier_identity;
|
||||
int32 output_shift_identity;
|
||||
int32_t input_offset;
|
||||
int32_t output_offset;
|
||||
int32_t output_multiplier_alpha;
|
||||
int32_t output_shift_alpha;
|
||||
int32_t output_multiplier_identity;
|
||||
int32_t output_shift_identity;
|
||||
};
|
||||
|
||||
template <typename P>
|
||||
@@ -1105,13 +1122,19 @@ inline void SetActivationParams(float min, float max, P* params) {
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void SetActivationParams(int32 min, int32 max, P* params) {
|
||||
inline void SetActivationParams(int32_t min, int32_t max, P* params) {
|
||||
params->quantized_activation_min = min;
|
||||
params->quantized_activation_max = max;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void GetActivationParams(const P& params, int32* min, int32* max) {
|
||||
inline void SetActivationParams(int64_t min, int64_t max, P* params) {
|
||||
params->int64_activation_min = min;
|
||||
params->int64_activation_max = max;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void GetActivationParams(const P& params, int32_t* min, int32_t* max) {
|
||||
*min = params.quantized_activation_min;
|
||||
*max = params.quantized_activation_max;
|
||||
}
|
||||
@@ -1122,6 +1145,11 @@ inline void GetActivationParams(const P& params, float* min, float* max) {
|
||||
*max = params.float_activation_max;
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
inline void GetActivationParams(const P& params, int64_t* min, int64_t* max) {
|
||||
*min = params.int64_activation_min;
|
||||
*max = params.int64_activation_max;
|
||||
}
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_
|
||||
|
||||
@@ -14,15 +14,176 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
// Assumes tensor_index is a valid index (in bounds)
|
||||
inline TfLiteTensor* GetTensorAtIndex(const TfLiteContext* context,
|
||||
int tensor_index) {
|
||||
if (context->tensors != nullptr) {
|
||||
return &context->tensors[tensor_index];
|
||||
} else {
|
||||
return context->GetTensor(context, tensor_index);
|
||||
}
|
||||
}
|
||||
|
||||
// Validate in a single place to reduce binary size
|
||||
inline TfLiteStatus ValidateTensorIndexingSafe(const TfLiteContext* context,
|
||||
int index, int max_size,
|
||||
const int* tensor_indices,
|
||||
int* tensor_index) {
|
||||
if (index < 0 || index >= max_size) {
|
||||
TF_LITE_KERNEL_LOG(const_cast<TfLiteContext*>(context),
|
||||
"Invalid tensor index %d (not in [0, %d))\n", index,
|
||||
max_size);
|
||||
return kTfLiteError;
|
||||
}
|
||||
if (tensor_indices[index] == kTfLiteOptionalTensor) {
|
||||
TF_LITE_KERNEL_LOG(const_cast<TfLiteContext*>(context),
|
||||
"Tensor at index %d was optional but was expected\n",
|
||||
index);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
*tensor_index = tensor_indices[index];
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Same as above but returns -1 for invalid inputs instead of status + logging
|
||||
// error.
|
||||
inline int ValidateTensorIndexing(const TfLiteContext* context, int index,
|
||||
int max_size, const int* tensor_indices) {
|
||||
if (index >= 0 && index < max_size) {
|
||||
const int tensor_index = tensor_indices[index];
|
||||
if (tensor_index != kTfLiteOptionalTensor) {
|
||||
return tensor_index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline TfLiteTensor* GetMutableInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
const int tensor_index = ValidateTensorIndexing(
|
||||
context, index, node->inputs->size, node->inputs->data);
|
||||
if (tensor_index < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return GetTensorAtIndex(context, tensor_index);
|
||||
}
|
||||
|
||||
inline TfLiteStatus GetMutableInputSafe(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index,
|
||||
const TfLiteTensor** tensor) {
|
||||
int tensor_index;
|
||||
TF_LITE_ENSURE_OK(
|
||||
context, ValidateTensorIndexingSafe(context, index, node->inputs->size,
|
||||
node->inputs->data, &tensor_index));
|
||||
*tensor = GetTensorAtIndex(context, tensor_index);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // anonymous namespace.
|
||||
|
||||
const TfLiteTensor* GetInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return GetMutableInput(context, node, index);
|
||||
}
|
||||
|
||||
TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node,
|
||||
int index, const TfLiteTensor** tensor) {
|
||||
return GetMutableInputSafe(context, node, index, tensor);
|
||||
}
|
||||
|
||||
TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index) {
|
||||
TfLiteTensor* tensor = GetMutableInput(context, node, index);
|
||||
return tensor->is_variable ? tensor : nullptr;
|
||||
}
|
||||
|
||||
TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index) {
|
||||
const int tensor_index = ValidateTensorIndexing(
|
||||
context, index, node->outputs->size, node->outputs->data);
|
||||
if (tensor_index < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return GetTensorAtIndex(context, tensor_index);
|
||||
}
|
||||
|
||||
TfLiteStatus GetOutputSafe(const TfLiteContext* context, const TfLiteNode* node,
|
||||
int index, TfLiteTensor** tensor) {
|
||||
int tensor_index;
|
||||
TF_LITE_ENSURE_OK(
|
||||
context, ValidateTensorIndexingSafe(context, index, node->outputs->size,
|
||||
node->outputs->data, &tensor_index));
|
||||
*tensor = GetTensorAtIndex(context, tensor_index);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return GetInput(context, node, index);
|
||||
}
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
TfLiteTensor* GetTemporary(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index) {
|
||||
const int tensor_index = ValidateTensorIndexing(
|
||||
context, index, node->temporaries->size, node->temporaries->data);
|
||||
if (tensor_index < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return GetTensorAtIndex(context, tensor_index);
|
||||
}
|
||||
|
||||
TfLiteStatus GetTemporarySafe(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index,
|
||||
TfLiteTensor** tensor) {
|
||||
int tensor_index;
|
||||
TF_LITE_ENSURE_OK(context, ValidateTensorIndexingSafe(
|
||||
context, index, node->temporaries->size,
|
||||
node->temporaries->data, &tensor_index));
|
||||
*tensor = GetTensorAtIndex(context, tensor_index);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
const TfLiteTensor* GetIntermediates(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
const int tensor_index = ValidateTensorIndexing(
|
||||
context, index, node->intermediates->size, node->intermediates->data);
|
||||
if (tensor_index < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return GetTensorAtIndex(context, tensor_index);
|
||||
}
|
||||
|
||||
TfLiteStatus GetIntermediatesSafe(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index,
|
||||
TfLiteTensor** tensor) {
|
||||
int tensor_index;
|
||||
TF_LITE_ENSURE_OK(context, ValidateTensorIndexingSafe(
|
||||
context, index, node->intermediates->size,
|
||||
node->intermediates->data, &tensor_index));
|
||||
*tensor = GetTensorAtIndex(context, tensor_index);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// Per-axis
|
||||
TfLiteStatus PopulateConvolutionQuantizationParams(
|
||||
TfLiteContext* context, const TfLiteTensor* input,
|
||||
@@ -126,11 +287,27 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
|
||||
// pipeline.
|
||||
if (bias) {
|
||||
const double bias_scale = static_cast<double>(bias->params.scale);
|
||||
// Here we're making sure the input_product_scale & bias_scale the same.
|
||||
// Normally this should be guaranteed by the training pipeline, we are
|
||||
// setting the threshold to be 2e-6 to allow some numeric stability
|
||||
// difference.
|
||||
TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <= 2e-6);
|
||||
// Here we're making sure the input_product_scale & bias_scale are about the
|
||||
// same. Since we have:
|
||||
// (output - output_zp) * output_scale =
|
||||
// input_product_scale * input_product + bias * bias_scale ---- (0)
|
||||
//
|
||||
// (0) equals:
|
||||
// (input_product + bias) * input_product_scale ----- (1)
|
||||
// +
|
||||
// bias * (bias_scale - input_product_scale) ------ (2)
|
||||
//
|
||||
// For the real kernel computation, we're doing (1), so we really need to
|
||||
// make sure (2) has minimum impact on the output, so:
|
||||
// bias * (bias_scale - input_product_scale) / output_scale should be
|
||||
// a small number for an integer.
|
||||
// Since normally bias should be within a small range.
|
||||
// We should expect (bias_scale - input_product_scale) / output_scale to
|
||||
// be a small number like 0.02.
|
||||
const double scale_diff = std::abs(input_product_scale - bias_scale);
|
||||
const double output_scale = static_cast<double>(output->params.scale);
|
||||
|
||||
TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02);
|
||||
}
|
||||
return GetQuantizedConvolutionMultipler(context, input, filter, output,
|
||||
multiplier);
|
||||
@@ -167,7 +344,7 @@ void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
|
||||
} else if (activation == kTfLiteActRelu6) {
|
||||
*act_min = std::max(qmin, quantize(0.0));
|
||||
*act_max = std::min(qmax, quantize(6.0));
|
||||
} else if (activation == kTfLiteActRelu1) {
|
||||
} else if (activation == kTfLiteActReluN1To1) {
|
||||
*act_min = std::max(qmin, quantize(-1.0));
|
||||
*act_max = std::min(qmax, quantize(1.0));
|
||||
} else {
|
||||
@@ -258,4 +435,44 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
|
||||
}
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
// Size of string is not constant, return 0 in such case.
|
||||
int TfLiteTypeGetSize(TfLiteType type) {
|
||||
switch (type) {
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_ASSERT_EQ(sizeof(uint8_t), 1);
|
||||
return 1;
|
||||
case kTfLiteInt8:
|
||||
TF_LITE_ASSERT_EQ(sizeof(int8_t), 1);
|
||||
return 1;
|
||||
case kTfLiteBool:
|
||||
return sizeof(bool);
|
||||
case kTfLiteInt16:
|
||||
TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
|
||||
return 2;
|
||||
case kTfLiteFloat16:
|
||||
TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
|
||||
return 2;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_ASSERT_EQ(sizeof(float), 4);
|
||||
return 4;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_ASSERT_EQ(sizeof(int32_t), 4);
|
||||
return 4;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_ASSERT_EQ(sizeof(int64_t), 8);
|
||||
return 8;
|
||||
case kTfLiteFloat64:
|
||||
TF_LITE_ASSERT_EQ(sizeof(double), 8);
|
||||
return 8;
|
||||
case kTfLiteComplex64:
|
||||
TF_LITE_ASSERT_EQ(sizeof(std::complex<float>), 8);
|
||||
return 8;
|
||||
case kTfLiteComplex128:
|
||||
TF_LITE_ASSERT_EQ(sizeof(std::complex<double>), 16);
|
||||
return 16;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
@@ -15,52 +15,148 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// A fair number of functions in this header have historically been inline.
|
||||
// It is ok to change functions to not be inline if the latency with
|
||||
// benchmark_model for MobileNet + MobileBERT is unaffected. If such a change is
|
||||
// made, move the newly non-inlined function declarations to the top of this
|
||||
// header file.
|
||||
|
||||
// Note: You must check if result is not null:
|
||||
//
|
||||
// TfLiteTensor* my_tensor = GetInput(context, node, kMyTensorIdx);
|
||||
// TF_LITE_ENSURE(context, my_tensor != nullptr);
|
||||
//
|
||||
// This is because the index might point to the optional tensor constant
|
||||
// (kTfLiteOptionalTensor) in which case there is no tensor to return.
|
||||
const TfLiteTensor* GetInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index);
|
||||
|
||||
// Same as `GetInput` but returns boolean and uses output argument for tensor.
|
||||
//
|
||||
// TfLiteTensor* my_tensor;
|
||||
// TF_LITE_ENSURE_OK(context,
|
||||
// GetInputSafe(context, node, kMyTensorIdx, &my_tensor));
|
||||
// // can use my_tensor directly from here onwards, it is not nullptr
|
||||
//
|
||||
// Should be used in cases where the binary size is too large.
|
||||
TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node,
|
||||
int index, const TfLiteTensor** tensor);
|
||||
|
||||
// Note: You must check if result is not null:
|
||||
//
|
||||
// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx);
|
||||
// TF_LITE_ENSURE(context, my_tensor != nullptr);
|
||||
//
|
||||
// This is because the index might point to the optional tensor constant
|
||||
// (kTfLiteOptionalTensor) in which case there is no tensor to return.
|
||||
TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index);
|
||||
|
||||
// Note: You must check if result is not null:
|
||||
//
|
||||
// TfLiteTensor* my_tensor = GetOutput(context, node, kMyTensorIdx);
|
||||
// TF_LITE_ENSURE(context, my_tensor != nullptr);
|
||||
//
|
||||
// This is because the index might point to the optional tensor constant
|
||||
// (kTfLiteOptionalTensor) in which case there is no tensor to return.
|
||||
TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index);
|
||||
|
||||
// Same as `GetOutput` but returns boolean and uses output argument for tensor.
|
||||
//
|
||||
// TfLiteTensor* my_tensor;
|
||||
// TF_LITE_ENSURE_OK(context,
|
||||
// GetOutputSafe(context, node, kMyTensorIdx, &my_tensor));
|
||||
// // can use my_tensor directly from here onwards, it is not nullptr
|
||||
//
|
||||
// Should be used in cases where the binary size is too large.
|
||||
TfLiteStatus GetOutputSafe(const TfLiteContext* context, const TfLiteNode* node,
|
||||
int index, TfLiteTensor** tensor);
|
||||
|
||||
// Note: You must check if result is not null:
|
||||
//
|
||||
// TfLiteTensor* my_tensor = GetOptionalInputTensor(context, node, kIdx);
|
||||
// TF_LITE_ENSURE(context, my_tensor != nullptr);
|
||||
//
|
||||
// This is because the index might point to the optional tensor constant
|
||||
// (kTfLiteOptionalTensor) in which case there is no tensor to return.
|
||||
//
|
||||
// Deprecated. GetInput has the same functionality.
|
||||
const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index);
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Note: You must check if result is not null:
|
||||
//
|
||||
// TfLiteTensor* my_tensor = GetTemporary(context, node, kMyTensorIdx);
|
||||
// TF_LITE_ENSURE(context, my_tensor != nullptr);
|
||||
//
|
||||
// This is because the index might point to the optional tensor constant
|
||||
// (kTfLiteOptionalTensor) in which case there is no tensor to return.
|
||||
TfLiteTensor* GetTemporary(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index);
|
||||
|
||||
// Same as `GetTemporary` but returns boolean and uses output argument for
|
||||
// tensor.
|
||||
//
|
||||
// TfLiteTensor* my_tensor;
|
||||
// TF_LITE_ENSURE_OK(context,
|
||||
// GetTemporarySafe(context, node, kMyTensorIdx,
|
||||
// &my_tensor));
|
||||
// // can use my_tensor directly from here onwards, it is not nullptr
|
||||
//
|
||||
// Should be used in cases where the binary size is too large.
|
||||
TfLiteStatus GetTemporarySafe(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index,
|
||||
TfLiteTensor** tensor);
|
||||
|
||||
// Note: You must check if result is not null:
|
||||
//
|
||||
// TfLiteTensor* my_tensor = GetIntermediates(context, node, kMyTensorIdx);
|
||||
// TF_LITE_ENSURE(context, my_tensor != nullptr);
|
||||
//
|
||||
// This is because the index might point to the optional tensor constant
|
||||
// (kTfLiteOptionalTensor) in which case there is no tensor to return.
|
||||
const TfLiteTensor* GetIntermediates(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index);
|
||||
|
||||
// Same as `GetIntermediates` but returns boolean and uses output argument for
|
||||
// tensor.
|
||||
//
|
||||
// TfLiteTensor* my_tensor;
|
||||
// TF_LITE_ENSURE_OK(context,
|
||||
// GetIntermediatesSafe(context, node, kMyTensorIdx,
|
||||
// &my_tensor));
|
||||
// // can use my_tensor directly from here onwards, it is not nullptr
|
||||
//
|
||||
// Should be used in cases where the binary size is too large.
|
||||
TfLiteStatus GetIntermediatesSafe(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index,
|
||||
TfLiteTensor** tensor);
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
inline int NumDimensions(const TfLiteTensor* t) { return t->dims->size; }
|
||||
inline int SizeOfDimension(const TfLiteTensor* t, int dim) {
|
||||
return t->dims->data[dim];
|
||||
}
|
||||
inline const TfLiteTensor* GetInput(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return &context
|
||||
->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
|
||||
}
|
||||
// Note: You must check if result is not null:
|
||||
// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx);
|
||||
// TF_LITE_ENSURE(context, my_tensor != nullptr);
|
||||
inline TfLiteTensor* GetVariableInput(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
TfLiteTensor* tensor =
|
||||
&context->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
|
||||
return (tensor->is_variable) ? tensor : nullptr;
|
||||
}
|
||||
inline TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
|
||||
int index) {
|
||||
return &context
|
||||
->tensors[flatbuffers::EndianScalar(node->outputs->data[index])];
|
||||
}
|
||||
inline TfLiteTensor* GetTemporary(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return &context->tensors[flatbuffers::EndianScalar(
|
||||
node->temporaries->data[index])];
|
||||
}
|
||||
inline const TfLiteTensor* GetIntermediates(TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return &context->tensors[node->intermediates->data[index]];
|
||||
}
|
||||
|
||||
inline int NumInputs(const TfLiteNode* node) { return node->inputs->size; }
|
||||
inline int NumOutputs(const TfLiteNode* node) { return node->outputs->size; }
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
inline int NumIntermediates(const TfLiteNode* node) {
|
||||
return node->intermediates->size;
|
||||
}
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
inline int64_t NumElements(const TfLiteIntArray* dims) {
|
||||
int64_t count = 1;
|
||||
@@ -74,19 +170,11 @@ inline int64_t NumElements(const TfLiteTensor* t) {
|
||||
return NumElements(t->dims);
|
||||
}
|
||||
|
||||
inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
int index) {
|
||||
const bool use_tensor = index < node->inputs->size &&
|
||||
node->inputs->data[index] != kTfLiteOptionalTensor;
|
||||
if (use_tensor) {
|
||||
return &context
|
||||
->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Determines whether tensor is constant.
|
||||
// TODO(b/138199592): Introduce new query which checks for constant OR
|
||||
// persistent-read-only, which would be useful for most tensor kernels that
|
||||
// are potentially dynamic based on the input tensor value availability at the
|
||||
// time of prepare.
|
||||
inline bool IsConstantTensor(const TfLiteTensor* tensor) {
|
||||
return tensor->allocation_type == kTfLiteMmapRo;
|
||||
}
|
||||
@@ -105,6 +193,14 @@ inline void SetTensorToDynamic(TfLiteTensor* tensor) {
|
||||
}
|
||||
}
|
||||
|
||||
// Sets tensor to persistent and read-only.
|
||||
inline void SetTensorToPersistentRo(TfLiteTensor* tensor) {
|
||||
if (tensor->allocation_type != kTfLitePersistentRo) {
|
||||
tensor->allocation_type = kTfLitePersistentRo;
|
||||
tensor->data.raw = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Determines whether it is a hybrid op - one that has float inputs and
|
||||
// quantized weights.
|
||||
inline bool IsHybridOp(const TfLiteTensor* input, const TfLiteTensor* weight) {
|
||||
@@ -162,7 +258,7 @@ void CalculateActivationRange(TfLiteFusedActivation activation,
|
||||
} else if (activation == kTfLiteActRelu6) {
|
||||
*activation_min = 0;
|
||||
*activation_max = 6;
|
||||
} else if (activation == kTfLiteActRelu1) {
|
||||
} else if (activation == kTfLiteActReluN1To1) {
|
||||
*activation_min = -1;
|
||||
*activation_max = 1;
|
||||
} else {
|
||||
@@ -188,6 +284,10 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
|
||||
const TfLiteTensor* input2,
|
||||
const TfLiteTensor* input3,
|
||||
TfLiteIntArray** output_shape);
|
||||
|
||||
// Return the size of given type in bytes. Return 0 in in case of string.
|
||||
int TfLiteTypeGetSize(TfLiteType type);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
|
||||
|
||||
@@ -19,7 +19,7 @@ limitations under the License.
|
||||
// non-portable function.
|
||||
#ifdef TF_LITE_MCU_DEBUG_LOG
|
||||
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
|
||||
#define DEBUG_LOG(x) \
|
||||
do { \
|
||||
@@ -36,7 +36,6 @@ inline void InfiniteLoop() {
|
||||
|
||||
#else // TF_LITE_MCU_DEBUG_LOG
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
@@ -45,6 +44,15 @@ inline void InfiniteLoop() {
|
||||
fprintf(stderr, "%s", (x)); \
|
||||
} while (0)
|
||||
|
||||
// Report Error for unsupported type by op 'op_name' and returns kTfLiteError.
|
||||
#define TF_LITE_UNSUPPORTED_TYPE(context, type, op_name) \
|
||||
do { \
|
||||
TF_LITE_KERNEL_LOG((context), "%s:%d Type %s is unsupported by op %s.", \
|
||||
__FILE__, __LINE__, TfLiteTypeGetName(type), \
|
||||
(op_name)); \
|
||||
return kTfLiteError; \
|
||||
} while (0)
|
||||
|
||||
#define TFLITE_ABORT abort()
|
||||
|
||||
#endif // TF_LITE_MCU_DEBUG_LOG
|
||||
|
||||
94
code/lib/tfmicro/tensorflow/lite/micro/all_ops_resolver.cc
Normal file
94
code/lib/tfmicro/tensorflow/lite/micro/all_ops_resolver.cc
Normal file
@@ -0,0 +1,94 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/all_ops_resolver.h"
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace custom {
|
||||
TfLiteRegistration* Register_ETHOSU();
|
||||
const char* GetString_ETHOSU();
|
||||
} // namespace custom
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
AllOpsResolver::AllOpsResolver() {
|
||||
// Please keep this list of Builtin Operators in alphabetical order.
|
||||
AddAbs();
|
||||
AddAdd();
|
||||
AddArgMax();
|
||||
AddArgMin();
|
||||
AddAveragePool2D();
|
||||
AddCeil();
|
||||
AddConcatenation();
|
||||
AddConv2D();
|
||||
AddCos();
|
||||
AddDepthwiseConv2D();
|
||||
AddDequantize();
|
||||
AddEqual();
|
||||
AddFloor();
|
||||
AddFullyConnected();
|
||||
AddGreater();
|
||||
AddGreaterEqual();
|
||||
AddHardSwish();
|
||||
AddL2Normalization();
|
||||
AddLess();
|
||||
AddLessEqual();
|
||||
AddLog();
|
||||
AddLogicalAnd();
|
||||
AddLogicalNot();
|
||||
AddLogicalOr();
|
||||
AddLogistic();
|
||||
AddMaximum();
|
||||
AddMaxPool2D();
|
||||
AddMean();
|
||||
AddMinimum();
|
||||
AddMul();
|
||||
AddNeg();
|
||||
AddNotEqual();
|
||||
AddPack();
|
||||
AddPad();
|
||||
AddPadV2();
|
||||
AddPrelu();
|
||||
AddQuantize();
|
||||
AddReduceMax();
|
||||
AddRelu();
|
||||
AddRelu6();
|
||||
AddReshape();
|
||||
AddResizeNearestNeighbor();
|
||||
AddRound();
|
||||
AddRsqrt();
|
||||
AddShape();
|
||||
AddSin();
|
||||
AddSoftmax();
|
||||
AddSplit();
|
||||
AddSplitV();
|
||||
AddSqrt();
|
||||
AddSquare();
|
||||
AddStridedSlice();
|
||||
AddSub();
|
||||
AddSvdf();
|
||||
AddTanh();
|
||||
AddUnpack();
|
||||
|
||||
// TODO(b/159644355): Figure out if custom Ops belong in AllOpsResolver.
|
||||
TfLiteRegistration* registration =
|
||||
tflite::ops::micro::custom::Register_ETHOSU();
|
||||
if (registration) {
|
||||
AddCustom(tflite::ops::micro::custom::GetString_ETHOSU(), registration);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -9,17 +9,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
class AllOpsResolver : public MicroMutableOpResolver {
|
||||
// The magic number in the template parameter is the maximum number of ops that
|
||||
// can be added to AllOpsResolver. It can be increased if needed. And most
|
||||
// applications that care about the memory footprint will want to directly use
|
||||
// MicroMutableOpResolver and have an application specific template parameter.
|
||||
// The examples directory has sample code for this.
|
||||
class AllOpsResolver : public MicroMutableOpResolver<128> {
|
||||
public:
|
||||
AllOpsResolver();
|
||||
|
||||
@@ -27,8 +30,6 @@ class AllOpsResolver : public MicroMutableOpResolver {
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ALL_OPS_RESOLVER_H_
|
||||
#endif // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,22 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
|
||||
#define TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
|
||||
|
||||
extern const unsigned char g_keyword_scrambled_model_data[];
|
||||
extern const unsigned int g_keyword_scrambled_model_data_length;
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_BENCHMARKS_KEYWORD_SCRAMBLED_MODEL_DATA_H_
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -36,6 +36,15 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
extern "C" void DebugLog(const char* s) { fprintf(stderr, "%s", s); }
|
||||
extern "C" void DebugLog(const char* s) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
// Reusing TF_LITE_STRIP_ERROR_STRINGS to disable DebugLog completely to get
|
||||
// maximum reduction in binary size. This is because we have DebugLog calls
|
||||
// via TF_LITE_CHECK that are not stubbed out by TF_LITE_REPORT_ERROR.
|
||||
fprintf(stderr, "%s", s);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -15,9 +15,17 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
// This function should be implemented by each target platform, and provide a
|
||||
// way for strings to be output to some text stream. For more information, see
|
||||
// tensorflow/lite/micro/debug_log.cc.
|
||||
extern "C" void DebugLog(const char* s);
|
||||
void DebugLog(const char* s);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
|
||||
@@ -21,6 +21,8 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/max.h"
|
||||
#include "tensorflow/lite/kernels/internal/min.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -32,11 +34,11 @@ inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
|
||||
case kTfLiteActNone:
|
||||
return a;
|
||||
case kTfLiteActRelu:
|
||||
return std::max(0.0f, a);
|
||||
case kTfLiteActRelu1:
|
||||
return std::max(-1.0f, std::min(a, 1.0f));
|
||||
return TfLiteMax(0.0f, a);
|
||||
case kTfLiteActReluN1To1:
|
||||
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
|
||||
case kTfLiteActRelu6:
|
||||
return std::max(0.0f, std::min(a, 6.0f));
|
||||
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
|
||||
case kTfLiteActTanh:
|
||||
return std::tanh(a);
|
||||
case kTfLiteActSignBit:
|
||||
|
||||
@@ -18,30 +18,82 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
struct ReluOpData {
|
||||
ReluParams params;
|
||||
};
|
||||
|
||||
struct Relu6OpData {
|
||||
int8_t six_int8;
|
||||
int8_t zero_int8;
|
||||
uint8_t six_uint8;
|
||||
uint8_t zero_uint8;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename Q>
|
||||
inline void ReluQuantized(int32_t lower, const RuntimeShape& input_shape,
|
||||
const Q* input_data, const RuntimeShape& output_shape,
|
||||
Q* output_data) {
|
||||
template <typename T>
|
||||
inline void ReluQuantized(const ReluOpData& data,
|
||||
const RuntimeShape& input_shape,
|
||||
const RuntimeShape& output_shape, const T* input_data,
|
||||
T* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const Q val = input_data[i];
|
||||
const Q clamped = val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
const int32_t val = static_cast<int32_t>(input_data[i]);
|
||||
int32_t clamped =
|
||||
data.params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
|
||||
data.params.output_multiplier,
|
||||
data.params.output_shift);
|
||||
clamped = std::max(data.params.quantized_activation_min, clamped);
|
||||
clamped = std::min(data.params.quantized_activation_max, clamped);
|
||||
output_data[i] = static_cast<T>(clamped);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
ReluOpData* data) {
|
||||
float act_min = 0.0;
|
||||
float act_max = std::numeric_limits<float>::infinity();
|
||||
double real_multiplier =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
|
||||
const RuntimeShape input_shape = GetTensorShape(input);
|
||||
const RuntimeShape output_shape = GetTensorShape(output);
|
||||
|
||||
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
|
||||
&data->params.output_shift);
|
||||
|
||||
data->params.quantized_activation_min = std::max(
|
||||
static_cast<int32_t>(std::numeric_limits<T>::min()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(roundf(act_min / output->params.scale)));
|
||||
data->params.quantized_activation_max =
|
||||
act_max == std::numeric_limits<float>::infinity()
|
||||
? static_cast<int32_t>(std::numeric_limits<T>::max())
|
||||
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(
|
||||
roundf(act_max / output->params.scale)));
|
||||
data->params.input_offset = input->params.zero_point;
|
||||
data->params.output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
@@ -77,33 +129,59 @@ inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
|
||||
}
|
||||
}
|
||||
|
||||
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
|
||||
}
|
||||
|
||||
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
CalculateReluOpData<int8_t>(input, output, data);
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
CalculateReluOpData<uint8_t>(input, output, data);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
ReluFloat(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
ReluFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
ReluQuantized<int8_t>(input->params.zero_point, GetTensorShape(input),
|
||||
GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<int8_t>(output));
|
||||
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
ReluQuantized<uint8_t>(input->params.zero_point, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output));
|
||||
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
@@ -114,37 +192,63 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
}
|
||||
|
||||
void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_int8 = input->params.zero_point;
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
data->six_uint8 = FloatToQuantizedType<uint8_t>(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_uint8 = input->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
Relu6Float(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
Relu6Float(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
const int8_t six = FloatToAsymmetricQuantizedInt8(
|
||||
6.0f, input->params.scale, input->params.zero_point);
|
||||
const int8_t zero = input->params.zero_point;
|
||||
Relu6Quantized<int8_t>(
|
||||
zero, six, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteUInt8: {
|
||||
const uint8_t six = FloatToAsymmetricQuantizedUInt8(
|
||||
6.0f, input->params.scale, input->params.zero_point);
|
||||
const uint8_t zero = input->params.zero_point;
|
||||
Relu6Quantized<uint8_t>(
|
||||
zero, six, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
@@ -157,28 +261,26 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_RELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::ReluPrepare,
|
||||
/*invoke=*/activations::ReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RELU() {
|
||||
return {/*init=*/activations::ReluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::ReluPrepare,
|
||||
/*invoke=*/activations::ReluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RELU6() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::Relu6Prepare,
|
||||
/*invoke=*/activations::Relu6Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RELU6() {
|
||||
return {/*init=*/activations::Relu6Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::Relu6Prepare,
|
||||
/*invoke=*/activations::Relu6Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -23,6 +23,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -40,18 +42,22 @@ struct OpData {
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32 output_activation_min;
|
||||
int32 output_activation_max;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32 input1_multiplier;
|
||||
int32 input2_multiplier;
|
||||
int32 output_multiplier;
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
|
||||
// Used only for float evals:
|
||||
float output_activation_min_f32;
|
||||
float output_activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
@@ -89,37 +95,44 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
|
||||
const OpData* data, const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
#define TF_LITE_ADD(opname) \
|
||||
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||
GetTensorData<float>(output))
|
||||
SetActivationParams(data->output_activation_min_f32,
|
||||
data->output_activation_max_f32, &op_params);
|
||||
if (data->requires_broadcast) {
|
||||
TF_LITE_ADD(BroadcastAdd4DSlow);
|
||||
reference_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_ADD(Add);
|
||||
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteAddParams* params, const OpData* data,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
@@ -135,46 +148,91 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_ADD(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
|
||||
reference_integer_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_ADD(reference_integer_ops, Add, int8_t);
|
||||
reference_integer_ops::Add(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
|
||||
reference_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
TF_LITE_ADD(reference_ops, Add, uint8_t);
|
||||
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
OpData data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpData(context, params, input1, input2, output, &data));
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalAdd(context, node, params, &data, input1, input2, output);
|
||||
EvalAdd(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, &data,
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -187,16 +245,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace add
|
||||
|
||||
TfLiteRegistration* Register_ADD() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ADD() {
|
||||
return {/*init=*/add::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/add::Prepare,
|
||||
/*invoke=*/add::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/all_ops_resolver.h"
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Register each supported op with:
|
||||
// AddBuiltin(<operator ID>, <registration>, [min version], [max version])
|
||||
AllOpsResolver::AllOpsResolver() {
|
||||
AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED(), 1, 4);
|
||||
AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_2D(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(), 1,
|
||||
3);
|
||||
AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_2D(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_ABS, Register_ABS());
|
||||
AddBuiltin(BuiltinOperator_SIN, Register_SIN());
|
||||
AddBuiltin(BuiltinOperator_COS, Register_COS());
|
||||
AddBuiltin(BuiltinOperator_LOG, Register_LOG());
|
||||
AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
|
||||
AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT());
|
||||
AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE());
|
||||
AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
|
||||
AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR());
|
||||
AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
|
||||
AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
|
||||
AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX());
|
||||
AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND());
|
||||
AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT());
|
||||
AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
|
||||
AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LESS, Register_LESS(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_CEIL, Register_CEIL());
|
||||
AddBuiltin(BuiltinOperator_ROUND, Register_ROUND());
|
||||
AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE());
|
||||
AddBuiltin(BuiltinOperator_PACK, Register_PACK(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_PAD, Register_PAD(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_PADV2, Register_PADV2(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_NEG, Register_NEG());
|
||||
AddBuiltin(BuiltinOperator_ADD, Register_ADD(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_MUL, Register_MUL(), 1, 3);
|
||||
AddBuiltin(BuiltinOperator_SUB, Register_SUB(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_QUANTIZE, Register_QUANTIZE());
|
||||
AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(), 1, 2);
|
||||
AddBuiltin(BuiltinOperator_RELU, Register_RELU());
|
||||
AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
|
||||
AddBuiltin(BuiltinOperator_MEAN, Register_MEAN());
|
||||
AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
|
||||
Register_RESIZE_NEAREST_NEIGHBOR(),
|
||||
/* min_version = */ 1,
|
||||
/* max_version = */ 2);
|
||||
AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2_NORMALIZATION());
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
@@ -45,14 +46,20 @@ inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* axis = GetInput(context, node, kAxis);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* axis =
|
||||
tflite::micro::GetEvalInput(context, node, kAxis);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
|
||||
ArgMinMaxHelper(GetTensorShape(input), GetTensorData<data_type>(input), \
|
||||
GetTensorData<axis_type>(axis), GetTensorShape(output), \
|
||||
GetTensorData<output_type>(output), is_arg_max)
|
||||
#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \
|
||||
ArgMinMaxHelper(tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<data_type>(input), \
|
||||
tflite::micro::GetTensorData<axis_type>(axis), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_type>(output), \
|
||||
is_arg_max)
|
||||
if (axis->type == kTfLiteInt32) {
|
||||
if (output->type == kTfLiteInt32) {
|
||||
switch (input->type) {
|
||||
@@ -67,18 +74,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only float32, uint8 and int8 are "
|
||||
"Only float32, uint8_t and int8_t are "
|
||||
"supported currently, got %s.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only int32_t are supported currently, got %s.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.",
|
||||
TF_LITE_KERNEL_LOG(context, "Only int32_t are supported currently, got %s.",
|
||||
TfLiteTypeGetName(axis->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
@@ -98,28 +106,26 @@ TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace arg_min_max
|
||||
|
||||
TfLiteRegistration* Register_ARG_MAX() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ARG_MAX() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_ARG_MIN() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ARG_MIN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/arg_min_max::ArgMinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -29,11 +30,13 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
@@ -43,26 +46,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Ceil(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Ceil(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace ceil
|
||||
|
||||
TfLiteRegistration* Register_CEIL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/ceil::Prepare,
|
||||
/*invoke=*/ceil::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CEIL() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/ceil::Prepare,
|
||||
/*invoke=*/ceil::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -17,11 +17,10 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
/*
|
||||
* The circular buffer custom operator is used to implement strided streaming
|
||||
@@ -78,7 +77,9 @@ void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
@@ -89,10 +90,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
// The circular buffer custom operator currently only supports int8.
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
|
||||
// The circular buffer custom operator currently only supports int8_t.
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
|
||||
// TODO(b/132070898): Use statically slotted OpData structures until a
|
||||
// scratch memory API is ready.
|
||||
@@ -121,8 +122,10 @@ void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
@@ -130,8 +133,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int depth = output->dims->data[3];
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
EvalInt8(GetTensorData<int8_t>(input), num_slots, depth,
|
||||
GetTensorData<int8_t>(output));
|
||||
EvalInt8(tflite::micro::GetTensorData<int8_t>(input), num_slots, depth,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -25,103 +26,109 @@ namespace micro {
|
||||
namespace comparisons {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
ComparisonParams params;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
// TODO(ruic): optimize macros below to using template functions.
|
||||
#define TF_LITE_QUANTIZE_COMPARISON(opname) \
|
||||
template <typename input_dtype> \
|
||||
void EvalQuantized##opname(TfLiteContext* context, TfLiteNode* node, \
|
||||
const TfLiteTensor* input1, \
|
||||
const TfLiteTensor* input2, TfLiteTensor* output, \
|
||||
bool requires_broadcast) { \
|
||||
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { \
|
||||
auto input1_offset = -input1->params.zero_point; \
|
||||
auto input2_offset = -input2->params.zero_point; \
|
||||
const int left_shift = 8; \
|
||||
\
|
||||
int32 input1_multiplier; \
|
||||
int input1_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier, \
|
||||
&input1_shift); \
|
||||
int32 input2_multiplier; \
|
||||
int input2_shift; \
|
||||
QuantizeMultiplierSmallerThanOneExp( \
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier, \
|
||||
&input2_shift); \
|
||||
\
|
||||
ComparisonParams op_params; \
|
||||
op_params.left_shift = left_shift; \
|
||||
op_params.input1_offset = input1_offset; \
|
||||
op_params.input1_multiplier = input1_multiplier; \
|
||||
op_params.input1_shift = input1_shift; \
|
||||
op_params.input2_offset = input2_offset; \
|
||||
op_params.input2_multiplier = input2_multiplier; \
|
||||
op_params.input2_shift = input2_shift; \
|
||||
if (requires_broadcast) { \
|
||||
reference_ops::Broadcast4DSlow##opname##WithScaling( \
|
||||
op_params, GetTensorShape(input1), \
|
||||
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<bool>(output)); \
|
||||
} else { \
|
||||
reference_ops::opname##WithScaling( \
|
||||
op_params, GetTensorShape(input1), \
|
||||
GetTensorData<input_dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<input_dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<bool>(output)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
TF_LITE_QUANTIZE_COMPARISON(Equal);
|
||||
TF_LITE_QUANTIZE_COMPARISON(NotEqual);
|
||||
TF_LITE_QUANTIZE_COMPARISON(Greater);
|
||||
TF_LITE_QUANTIZE_COMPARISON(GreaterEqual);
|
||||
TF_LITE_QUANTIZE_COMPARISON(Less);
|
||||
TF_LITE_QUANTIZE_COMPARISON(LessEqual);
|
||||
#undef TF_LITE_QUANTIZE_COMPARISON
|
||||
|
||||
#define TF_LITE_COMPARISON(type, opname, requires_broadcast) \
|
||||
{ \
|
||||
ComparisonParams op_params; \
|
||||
requires_broadcast \
|
||||
? reference_ops::Broadcast4DSlow##opname##NoScaling( \
|
||||
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<type>(input2), \
|
||||
GetTensorShape(output), GetTensorData<bool>(output)) \
|
||||
: reference_ops::opname##NoScaling( \
|
||||
op_params, GetTensorShape(input1), GetTensorData<type>(input1), \
|
||||
GetTensorShape(input2), GetTensorData<type>(input2), \
|
||||
GetTensorShape(output), GetTensorData<bool>(output)); \
|
||||
}
|
||||
|
||||
TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
TF_LITE_COMPARISON(bool, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Equal, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::EqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -133,30 +140,100 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// TODO(renjieliu): Refactor the logic to avoid duplications.
|
||||
TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteBool:
|
||||
TF_LITE_COMPARISON(bool, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<bool>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<bool>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, NotEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedNotEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedNotEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowNotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::NotEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -167,27 +244,87 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Greater, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedGreater<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedGreater<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -198,27 +335,87 @@ TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, GreaterEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedGreaterEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedGreaterEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::GreaterEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -229,27 +426,87 @@ TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, Less, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedLess<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedLess<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -260,27 +517,87 @@ TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
bool requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1);
|
||||
RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2);
|
||||
RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
|
||||
bool* output_data = tflite::micro::GetTensorData<bool>(output);
|
||||
|
||||
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
|
||||
switch (input1->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_COMPARISON(float, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<float>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<float>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_COMPARISON(int32_t, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int32_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
TF_LITE_COMPARISON(int64_t, LessEqual, requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualNoScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int64_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantizedLessEqual<uint8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedLessEqual<int8_t>(context, node, input1, input2, output,
|
||||
requires_broadcast);
|
||||
requires_broadcast
|
||||
? reference_ops::Broadcast4DSlowLessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data)
|
||||
: reference_ops::LessEqualWithScaling(
|
||||
data->params, input1_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input1), input2_shape,
|
||||
tflite::micro::GetTensorData<int8_t>(input2), output_shape,
|
||||
output_data);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -291,78 +608,115 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
|
||||
if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {
|
||||
auto input1_offset = -input1->params.zero_point;
|
||||
auto input2_offset = -input2->params.zero_point;
|
||||
const int kLeftShift = 8;
|
||||
|
||||
int32_t input1_multiplier;
|
||||
int input1_shift;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
static_cast<double>(input1->params.scale), &input1_multiplier,
|
||||
&input1_shift);
|
||||
int32_t input2_multiplier;
|
||||
int input2_shift;
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
static_cast<double>(input2->params.scale), &input2_multiplier,
|
||||
&input2_shift);
|
||||
|
||||
data->params.left_shift = kLeftShift;
|
||||
data->params.input1_offset = input1_offset;
|
||||
data->params.input1_multiplier = input1_multiplier;
|
||||
data->params.input1_shift = input1_shift;
|
||||
data->params.input2_offset = input2_offset;
|
||||
data->params.input2_multiplier = input2_multiplier;
|
||||
data->params.input2_shift = input2_shift;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace comparisons
|
||||
|
||||
TfLiteRegistration* Register_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::EqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::EqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_NOT_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::NotEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_NOT_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::NotEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_GREATER() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::GreaterEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_GREATER() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::GreaterEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_GREATER_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::GreaterEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_GREATER_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::GreaterEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LESS() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::LessEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LESS() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::LessEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LESS_EQUAL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/comparisons::LessEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LESS_EQUAL() {
|
||||
return {/*init=*/comparisons::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/comparisons::Prepare,
|
||||
/*invoke=*/comparisons::LessEqualEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,10 +18,11 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -31,14 +32,116 @@ namespace concatenation {
|
||||
constexpr int kMaxInputNum = 10; // Maximum number of input tensors
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
ConcatenationParams params;
|
||||
};
|
||||
|
||||
// Handles negative axis index, coerces to positive index value.
|
||||
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
|
||||
if (axis >= 0) {
|
||||
return axis;
|
||||
} else {
|
||||
return NumDimensions(output_tensor) + axis;
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are helpers to get tensor data in the format that the
|
||||
// reference op implementation expects. They provide the same functionality as
|
||||
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
|
||||
|
||||
// Gets shapes from a list of tensors.
|
||||
inline void GetAllInputTensorShapes(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
RuntimeShape all_shapes[kMaxInputNum]) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
RuntimeShape shape = tflite::micro::GetTensorShape(t);
|
||||
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
|
||||
}
|
||||
}
|
||||
|
||||
// Get shape pointers from a list of shapes.
|
||||
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
|
||||
const RuntimeShape* pointers[]) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
pointers[i] = &shapes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Gets data pointers from a list of tensors.
|
||||
template <typename T>
|
||||
inline void GetAllInputTensorData(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
T* all_data[kMaxInputNum]) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
all_data[i] = tflite::micro::GetTensorData<T>(t);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const data_type* inputs_data[kMaxInputNum];
|
||||
GetAllInputTensorShapes(context, node, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllInputTensorData(context, node, inputs_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<data_type>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const uint8_t* inputs_data[kMaxInputNum];
|
||||
GetAllInputTensorShapes(context, node, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllInputTensorData(context, node, inputs_data);
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
reference_ops::ConcatenationWithScaling(
|
||||
data->params, inputs_shape_ptr, inputs_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// This function only checks the types. Additional shape validations are
|
||||
// performed in the reference implementation called during Eval().
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
TfLiteType input_type = GetInput(context, node, 0)->type;
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
const TfLiteTensor* input_tensor = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input_tensor != nullptr);
|
||||
TfLiteType input_type = input_tensor->type;
|
||||
const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output_tensor != nullptr);
|
||||
TfLiteType output_type = output_tensor->type;
|
||||
|
||||
// Check activation and input type
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
@@ -57,133 +160,76 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Shapes with dimensions >4 are not yet supported with static allocation.
|
||||
for (int i = 0; i < num_inputs; ++i) {
|
||||
const TfLiteTensor* input = GetInput(context, node, i);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
int num_dimensions = NumDimensions(input);
|
||||
|
||||
if (num_dimensions > 4) {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Op Concatenation does not currently support num dimensions >4 "
|
||||
"Tensor '%s' has %d dimensions.",
|
||||
input->name, num_dimensions);
|
||||
"Tensor has %d dimensions.",
|
||||
num_dimensions);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate OpData.
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
switch (output_type) { // Already know in/outtypes are same.
|
||||
case kTfLiteFloat32:
|
||||
case kTfLiteInt32:
|
||||
case kTfLiteInt64: {
|
||||
data->params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
data->params.inputs_count = node->inputs->size;
|
||||
break;
|
||||
}
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8: {
|
||||
data->params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
data->params.inputs_count = node->inputs->size;
|
||||
|
||||
float* input_scales =
|
||||
reinterpret_cast<float*>(context->AllocatePersistentBuffer(
|
||||
context, node->inputs->size * sizeof(float)));
|
||||
|
||||
int32_t* input_zero_points =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, node->inputs->size * sizeof(int32_t)));
|
||||
|
||||
// Allocate persistent scale and zeropoint buffers.
|
||||
// Store input scale and zero point values in OpParams:
|
||||
for (int i = 0; i < node->inputs->size; ++i) {
|
||||
const TfLiteTensor* t = GetInput(context, node, i);
|
||||
TF_LITE_ENSURE(context, t != nullptr);
|
||||
input_scales[i] = t->params.scale;
|
||||
input_zero_points[i] = t->params.zero_point;
|
||||
}
|
||||
|
||||
data->params.input_scale = input_scales;
|
||||
data->params.input_zeropoint = input_zero_points;
|
||||
data->params.output_zeropoint = output->params.zero_point;
|
||||
data->params.output_scale = output->params.scale;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Op Concatenation does not currently support Type '%s'.",
|
||||
TfLiteTypeGetName(output_type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Handles negative axis index, coerces to positive index value.
|
||||
inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) {
|
||||
if (axis >= 0) {
|
||||
return axis;
|
||||
} else {
|
||||
return NumDimensions(output_tensor) + axis;
|
||||
}
|
||||
}
|
||||
|
||||
// The following functions are helpers to get tensor data in the format that the
|
||||
// reference op implementation expects. They provide the same functionality as
|
||||
// class VectorOfTensors and class VectorOfQuantizedTensors in TFLite.
|
||||
|
||||
// Gets shapes from a list of tensors.
|
||||
inline void GetAllTensorShapes(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
RuntimeShape all_shapes[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
RuntimeShape shape = GetTensorShape(t);
|
||||
all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData());
|
||||
}
|
||||
}
|
||||
|
||||
// Get shape pointers from a list of shapes.
|
||||
inline void GetShapesPointers(const RuntimeShape* shapes, size_t num,
|
||||
const RuntimeShape* pointers[]) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
pointers[i] = &shapes[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Gets data pointers from a list of tensors.
|
||||
template <typename T>
|
||||
inline void GetAllTensorData(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
T* all_data[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
all_data[i] = GetTensorData<T>(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Gets scale and zero point from a list of tensors
|
||||
inline void GetAllQuantizationParam(const TfLiteContext& context,
|
||||
const TfLiteIntArray& tensor_list,
|
||||
float scales[kMaxInputNum],
|
||||
int32 zero_points[kMaxInputNum]) {
|
||||
for (int i = 0; i < tensor_list.size; ++i) {
|
||||
const TfLiteTensor* t = &context.tensors[tensor_list.data[i]];
|
||||
scales[i] = t->params.scale;
|
||||
zero_points[i] = t->params.zero_point;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_type>
|
||||
void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const data_type* inputs_data[kMaxInputNum];
|
||||
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllTensorData(*context, *node->inputs, inputs_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
ConcatenationParams op_params;
|
||||
op_params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
op_params.inputs_count = NumInputs(node);
|
||||
|
||||
reference_ops::Concatenation(op_params, inputs_shape_ptr, inputs_data,
|
||||
GetTensorShape(output),
|
||||
GetTensorData<data_type>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Collect the shapes and data pointer of input tensors
|
||||
RuntimeShape inputs_shape[kMaxInputNum];
|
||||
const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
|
||||
const uint8_t* inputs_data[kMaxInputNum];
|
||||
float inputs_scale[kMaxInputNum];
|
||||
int32 inputs_zero_point[kMaxInputNum];
|
||||
GetAllTensorShapes(*context, *node->inputs, inputs_shape);
|
||||
GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
|
||||
GetAllTensorData(*context, *node->inputs, inputs_data);
|
||||
GetAllQuantizationParam(*context, *node->inputs, inputs_scale,
|
||||
inputs_zero_point);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
|
||||
const TfLiteConcatenationParams* params =
|
||||
reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
|
||||
|
||||
ConcatenationParams op_params;
|
||||
op_params.axis = CalculatePositiveAxis(params->axis, output);
|
||||
op_params.inputs_count = NumInputs(node);
|
||||
op_params.input_zeropoint = inputs_zero_point;
|
||||
op_params.input_scale = inputs_scale;
|
||||
op_params.output_zeropoint = output->params.zero_point;
|
||||
op_params.output_scale = output->params.scale;
|
||||
|
||||
reference_ops::ConcatenationWithScaling(op_params, inputs_shape_ptr,
|
||||
inputs_data, GetTensorShape(output),
|
||||
GetTensorData<uint8>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
|
||||
const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output_tensor != nullptr);
|
||||
TfLiteType output_type = output_tensor->type;
|
||||
|
||||
switch (output_type) { // Already know in/outtypes are same.
|
||||
case kTfLiteFloat32:
|
||||
@@ -214,16 +260,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace concatenation
|
||||
|
||||
TfLiteRegistration* Register_CONCATENATION() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/concatenation::Prepare,
|
||||
/*invoke=*/concatenation::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CONCATENATION() {
|
||||
return {/*init=*/concatenation::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/concatenation::Prepare,
|
||||
/*invoke=*/concatenation::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,279 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace conv {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
// Angepasst jomjol 05.06.20
|
||||
//constexpr int kMaxChannels = 1024;
|
||||
constexpr int kMaxChannels = 4096;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
// This file has 2 implementation of Conv.
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
return PaddingType::kSame;
|
||||
case TfLitePadding::kTfLitePaddingValid:
|
||||
return PaddingType::kValid;
|
||||
case TfLitePadding::kTfLitePaddingUnknown:
|
||||
default:
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params->padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
params->dilation_height_factor, params->dilation_width_factor, height,
|
||||
width, filter_height, filter_width, padding, &out_height, &out_width);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift),
|
||||
output_channels));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = -data->output_shift;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
||||
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32_t>(bias), GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
|
||||
GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
TfLiteTensor* im2col) {
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(filter),
|
||||
GetTensorData<float>(filter), GetTensorShape(bias),
|
||||
GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output), GetTensorShape(im2col),
|
||||
GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
int filter_width = filter->dims->data[2];
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
OpData data;
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kConvQuantizedDimension]);
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, &data));
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
||||
output, nullptr);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, &data, input, filter, bias, nullptr,
|
||||
nullptr, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace conv
|
||||
|
||||
TfLiteRegistration* Register_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -23,19 +23,15 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace conv {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
// Angepasst jomjol 05.06.20
|
||||
//constexpr int kMaxChannels = 1024;
|
||||
constexpr int kMaxChannels = 32384;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
@@ -45,15 +41,20 @@ constexpr int kConvQuantizedDimension = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
@@ -74,10 +75,10 @@ inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpData* data) {
|
||||
const TfLiteConvParams* params, int width,
|
||||
int height, int filter_width, int filter_height,
|
||||
int out_width, int out_height,
|
||||
const TfLiteType data_type, OpData* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
@@ -94,10 +95,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
@@ -111,100 +115,24 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = -data->output_shift;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
||||
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32_t>(bias), GetTensorShape(output),
|
||||
GetTensorData<uint8_t>(output), GetTensorShape(im2col),
|
||||
GetTensorData<uint8_t>(im2col), nullptr);
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
TfLiteTensor* im2col) {
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(filter),
|
||||
GetTensorData<float>(filter), GetTensorShape(bias),
|
||||
GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output), GetTensorShape(im2col),
|
||||
GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
int input_width = input->dims->data[2];
|
||||
int input_height = input->dims->data[1];
|
||||
@@ -212,9 +140,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int filter_height = filter->dims->data[1];
|
||||
int output_width = output->dims->data[2];
|
||||
int output_height = output->dims->data[1];
|
||||
|
||||
|
||||
struct tflite::ops::micro::conv::OpData *data = (struct tflite::ops::micro::conv::OpData*) malloc(sizeof(struct tflite::ops::micro::conv::OpData));
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
@@ -222,8 +156,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||
filter->quantization.params);
|
||||
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||
@@ -240,6 +173,136 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
} // namespace conv
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output,
|
||||
TfLiteEvalTensor* im2col) {
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
|
||||
TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
// TODO(b/154032858): Investigate removing extra copies.
|
||||
ConvParams op_params;
|
||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorShape(im2col),
|
||||
tflite::micro::GetTensorData<float>(im2col));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
|
||||
@@ -256,27 +319,22 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
free(data);
|
||||
return kTfLiteError;
|
||||
}
|
||||
free(data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace conv
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_CONV_2D() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -24,18 +24,15 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace depthwise_conv {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kFilterTensor = 1;
|
||||
constexpr int kBiasTensor = 2;
|
||||
constexpr int kOutputTensor = 0;
|
||||
constexpr int kMaxChannels = 1024;
|
||||
|
||||
// Depthwise conv is quantized along dimension 3:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
@@ -43,16 +40,20 @@ constexpr int kDepthwiseConvQuantizedDimension = 3;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
// TODO(b/141139247): Allocate these dynamically when possible.
|
||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
||||
int32_t per_channel_output_shift[kMaxChannels];
|
||||
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
@@ -78,125 +79,44 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias =
|
||||
GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
return tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params->activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier,
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels));
|
||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data->per_channel_output_multiplier,
|
||||
data->per_channel_output_shift, GetTensorShape(input),
|
||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||
GetTensorData<int8>(output));
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data->output_shift;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(filter), GetTensorData<uint8_t>(filter),
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||
const TfLiteTensor* bias =
|
||||
(NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
const TfLiteType data_type = input->type;
|
||||
int width = SizeOfDimension(input, 2);
|
||||
@@ -204,7 +124,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
int filter_width = SizeOfDimension(filter, 2);
|
||||
int filter_height = SizeOfDimension(filter, 1);
|
||||
|
||||
OpData data;
|
||||
// Per channel quantization is only needed for int8_t inference. For other
|
||||
// quantized types, only a single scale and zero point is needed.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
// Dynimically allocate per-channel quantization parameters.
|
||||
data->per_channel_output_multiplier =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
@@ -227,20 +156,151 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
|
||||
filter_width, filter_height, data_type,
|
||||
&data));
|
||||
data));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params,
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = 0;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
// TODO(b/130439627): Use calculated value for clamping.
|
||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||
|
||||
reference_integer_ops::DepthwiseConvPerChannel(
|
||||
op_params, data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteDepthwiseConvParams* params, const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::DepthwiseParams op_params;
|
||||
// Padding type is ignored, but still set.
|
||||
op_params.padding_type = PaddingType::kSame;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||
op_params.depth_multiplier = params->depth_multiplier;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
op_params.input_offset = input_offset;
|
||||
op_params.weights_offset = filter_offset;
|
||||
op_params.output_offset = output_offset;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||
op_params.output_shift = -data.output_shift;
|
||||
|
||||
tflite::reference_ops::DepthwiseConv(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFilterTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kBiasTensor)
|
||||
: nullptr;
|
||||
|
||||
// TODO(aselle): Consider whether float conv and quantized conv should be
|
||||
// separate ops to avoid dispatch overhead here.
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input, filter, bias, output);
|
||||
EvalFloat(context, node, params, data, input, filter, bias, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
||||
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
|
||||
output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
EvalQuantized(context, node, params, &data, input, filter, bias, output);
|
||||
EvalQuantized(context, node, params, data, input, filter, bias, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -250,20 +310,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace depthwise_conv
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/depthwise_conv::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -22,19 +22,39 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace dequantize {
|
||||
|
||||
struct OpData {
|
||||
tflite::DequantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt8 ||
|
||||
@@ -42,32 +62,49 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(
|
||||
context, output->type == kTfLiteFloat32 || output->type == kTfLiteInt32);
|
||||
|
||||
if (output->type == kTfLiteInt32) {
|
||||
const double effective_output_scale =
|
||||
static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = input->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(input->params.scale);
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
tflite::DequantizationParams op_params;
|
||||
op_params.zero_point = input->params.zero_point;
|
||||
op_params.scale = static_cast<double>(input->params.scale);
|
||||
switch (input->type) {
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Dequantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<int16_t>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
@@ -76,28 +113,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (output->type == kTfLiteInt32) {
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
const double effective_output_scale =
|
||||
static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(effective_output_scale, &output_multiplier,
|
||||
&output_shift);
|
||||
int flat_size =
|
||||
MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));
|
||||
int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
switch (input->type) {
|
||||
case kTfLiteInt16: {
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int16_t>(input), flat_size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int32_t>(output));
|
||||
tflite::micro::GetTensorData<int16_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int8_t>(input), flat_size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int32_t>(output));
|
||||
tflite::micro::GetTensorData<int8_t>(input), flat_size,
|
||||
data->output_multiplier, data->output_shift,
|
||||
data->quantization_params.zero_point, data->output_zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -118,16 +150,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace dequantize
|
||||
|
||||
TfLiteRegistration* Register_DEQUANTIZE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/dequantize::Prepare,
|
||||
/*invoke=*/dequantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_DEQUANTIZE() {
|
||||
return {/*init=*/dequantize::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/dequantize::Prepare,
|
||||
/*invoke=*/dequantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,6 +18,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -39,8 +41,10 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (!IsSupportedType(input->type)) {
|
||||
TF_LITE_KERNEL_LOG(context, "Input data type %s (%d) is not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
@@ -52,13 +56,13 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
template <typename T>
|
||||
inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
T func(T), TfLiteType expected_type) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, expected_type);
|
||||
const int64_t num_elements = NumElements(input);
|
||||
const T* in_data = GetTensorData<T>(input);
|
||||
T* out_data = GetTensorData<T>(output);
|
||||
for (int64_t i = 0; i < num_elements; ++i) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type);
|
||||
const size_t num_elements = ElementCount(*input->dims);
|
||||
const T* in_data = tflite::micro::GetTensorData<T>(input);
|
||||
T* out_data = tflite::micro::GetTensorData<T>(output);
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
out_data[i] = func(in_data[i]);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
@@ -109,116 +113,100 @@ TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace elementwise
|
||||
|
||||
TfLiteRegistration* Register_ABS() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::AbsEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ABS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::AbsEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SIN() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SIN() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SinEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_COS() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::CosEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_COS() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::CosEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOG() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::LogEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOG() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::LogEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SQRT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SQRT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_RSQRT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::RsqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RSQRT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::RsqrtEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_SQUARE() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SquareEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SQUARE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
|
||||
/*invoke=*/elementwise::SquareEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_NOT() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
|
||||
/*invoke=*/elementwise::LogicalNotEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOGICAL_NOT() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/
|
||||
elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
|
||||
/*invoke=*/elementwise::LogicalNotEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -12,16 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
// Abstract string. We don't want even absl at this level.
|
||||
#ifndef TENSORFLOW_LITE_STRING_TYPE_H_
|
||||
#define TENSORFLOW_LITE_STRING_TYPE_H_
|
||||
|
||||
#include <string>
|
||||
//
|
||||
// This is a stub file for non-Ethos platforms
|
||||
//
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace custom {
|
||||
TfLiteRegistration* Register_ETHOSU() { return nullptr; }
|
||||
|
||||
using std::string;
|
||||
const char* GetString_ETHOSU() { return ""; }
|
||||
|
||||
} // namespace custom
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_STRING_TYPE_H_
|
||||
@@ -17,7 +17,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,25 +28,28 @@ constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace floor
|
||||
|
||||
TfLiteRegistration* Register_FLOOR() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/floor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_FLOOR() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/floor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -13,20 +13,19 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
#include "tensorflow/lite/micro/kernels/fully_connected.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace fully_connected {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
@@ -40,6 +39,10 @@ struct OpData {
|
||||
int32_t output_activation_max;
|
||||
// The index of the temporary tensor where the quantized inputs are cached.
|
||||
int input_quantized_index;
|
||||
// Cached zero point values of tensors.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
@@ -64,20 +67,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
void* data = nullptr;
|
||||
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||
kTfLiteError) {
|
||||
return nullptr;
|
||||
}
|
||||
return data;
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
@@ -89,11 +89,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
@@ -102,13 +105,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const OpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.weights_offset = -filter->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
// TODO(b/138810107): Figure out whether output shift should be inverted
|
||||
op_params.output_shift = -data.output_shift;
|
||||
@@ -116,20 +121,25 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
reference_integer_ops::FullyConnected(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(filter), GetTensorData<int8_t>(filter),
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData& data, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
||||
TfLiteTensor* output) {
|
||||
const int32_t input_offset = -input->params.zero_point;
|
||||
const int32_t filter_offset = -filter->params.zero_point;
|
||||
const int32_t output_offset = output->params.zero_point;
|
||||
const OpData& data, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias,
|
||||
TfLiteEvalTensor* output) {
|
||||
const int32_t input_offset = -data.input_zero_point;
|
||||
const int32_t filter_offset = -data.filter_zero_point;
|
||||
const int32_t output_offset = data.output_zero_point;
|
||||
|
||||
tflite::FullyConnectedParams op_params;
|
||||
op_params.input_offset = input_offset;
|
||||
@@ -141,12 +151,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
|
||||
GetTensorShape(filter), GetTensorData<uint8_t>(filter), \
|
||||
GetTensorShape(bias), GetTensorData<int32_t>(bias), \
|
||||
GetTensorShape(output), GetTensorData<output_data_type>(output))
|
||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||
reference_ops::FullyConnected( \
|
||||
op_params, tflite::micro::GetTensorShape(input), \
|
||||
tflite::micro::GetTensorData<uint8_t>(input), \
|
||||
tflite::micro::GetTensorShape(filter), \
|
||||
tflite::micro::GetTensorData<uint8_t>(filter), \
|
||||
tflite::micro::GetTensorShape(bias), \
|
||||
tflite::micro::GetTensorData<int32_t>(bias), \
|
||||
tflite::micro::GetTensorShape(output), \
|
||||
tflite::micro::GetTensorData<output_data_type>(output))
|
||||
switch (output->type) {
|
||||
case kTfLiteUInt8:
|
||||
TF_LITE_FULLY_CONNECTED(uint8_t);
|
||||
@@ -165,8 +179,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteFusedActivation activation,
|
||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* filter,
|
||||
const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
@@ -174,10 +189,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.float_activation_min = output_activation_min;
|
||||
op_params.float_activation_max = output_activation_max;
|
||||
tflite::reference_ops::FullyConnected(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@@ -186,10 +205,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const auto* params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
tflite::micro::GetEvalInput(context, node, kBiasTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||
@@ -214,20 +237,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace fully_connected
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_FULLY_CONNECTED() {
|
||||
static TfLiteRegistration r = {/*init=*/fully_connected::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/fully_connected::Prepare,
|
||||
/*invoke=*/fully_connected::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_FULLY_CONNECTED() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// This is the most generic TfLiteRegistration. The actual supported types may
|
||||
// still be target dependent. The only requirement is that every implementation
|
||||
// (reference or optimized) must define this function.
|
||||
TfLiteRegistration Register_FULLY_CONNECTED();
|
||||
|
||||
#if defined(CMSIS_NN) || defined(ARDUINO)
|
||||
// The Arduino is a special case where we use the CMSIS kernels, but because of
|
||||
// the current approach to building for Arduino, we do not support -DCMSIS_NN as
|
||||
// part of the build. As a result, we use defined(ARDUINO) as proxy for the
|
||||
// CMSIS kernels for this one special case.
|
||||
|
||||
// Returns a TfLiteRegistration struct for cmsis-nn kernel variant that only
|
||||
// supports int8.
|
||||
TfLiteRegistration Register_FULLY_CONNECTED_INT8();
|
||||
|
||||
#else
|
||||
// Note that while this block gets used for both reference and optimized kernels
|
||||
// that do not have any specialized implementations, the only goal here is to
|
||||
// define fallback implementation that allow reference kernels to still be used
|
||||
// from applications that call a more specific kernel variant.
|
||||
|
||||
inline TfLiteRegistration Register_FULLY_CONNECTED_INT8() {
|
||||
return Register_FULLY_CONNECTED();
|
||||
}
|
||||
|
||||
#endif
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
142
code/lib/tfmicro/tensorflow/lite/micro/kernels/hard_swish.cc
Normal file
142
code/lib/tfmicro/tensorflow/lite/micro/kernels/hard_swish.cc
Normal file
@@ -0,0 +1,142 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace hard_swish {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
|
||||
}
|
||||
|
||||
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
|
||||
|
||||
params->input_zero_point = input->params.zero_point;
|
||||
params->output_zero_point = output->params.zero_point;
|
||||
|
||||
const float input_scale = input->params.scale;
|
||||
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
|
||||
const float reluish_scale = 3.0f / 32768.0f;
|
||||
const float output_scale = output->params.scale;
|
||||
|
||||
const double output_multiplier =
|
||||
static_cast<double>(hires_input_scale / output_scale);
|
||||
int32_t output_multiplier_fixedpoint_int32;
|
||||
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
|
||||
¶ms->output_multiplier_exponent);
|
||||
DownScaleInt32ToInt16Multiplier(
|
||||
output_multiplier_fixedpoint_int32,
|
||||
¶ms->output_multiplier_fixedpoint_int16);
|
||||
|
||||
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
|
||||
|
||||
const double reluish_multiplier =
|
||||
static_cast<double>(hires_input_scale / reluish_scale);
|
||||
int32_t reluish_multiplier_fixedpoint_int32;
|
||||
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
|
||||
¶ms->reluish_multiplier_exponent);
|
||||
DownScaleInt32ToInt16Multiplier(
|
||||
reluish_multiplier_fixedpoint_int32,
|
||||
¶ms->reluish_multiplier_fixedpoint_int16);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::reference_ops::HardSwish<float>(
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
tflite::reference_ops::HardSwish<uint8_t>(
|
||||
*params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
tflite::reference_ops::HardSwish<int8_t>(
|
||||
*params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} break;
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context,
|
||||
"Only float32/int8_t/uint8_t are supported currently, got %s",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace hard_swish
|
||||
|
||||
TfLiteRegistration Register_HARD_SWISH() {
|
||||
return {/*init=*/hard_swish::HardSwishInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/hard_swish::HardSwishPrepare,
|
||||
/*invoke=*/hard_swish::HardSwishEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
165
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.cc
Normal file
165
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_runner.cc
Normal file
@@ -0,0 +1,165 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
namespace {
|
||||
constexpr size_t kBufferAlignment = 16;
|
||||
} // namespace
|
||||
|
||||
// TODO(b/161841696): Consider moving away from global arena buffers:
|
||||
constexpr int KernelRunner::kNumScratchBuffers_;
|
||||
constexpr int KernelRunner::kKernelRunnerBufferSize_;
|
||||
uint8_t KernelRunner::kKernelRunnerBuffer_[];
|
||||
|
||||
KernelRunner::KernelRunner(const TfLiteRegistration& registration,
|
||||
TfLiteTensor* tensors, int tensors_size,
|
||||
TfLiteIntArray* inputs, TfLiteIntArray* outputs,
|
||||
void* builtin_data, ErrorReporter* error_reporter)
|
||||
: allocator_(SimpleMemoryAllocator::Create(
|
||||
error_reporter, kKernelRunnerBuffer_, kKernelRunnerBufferSize_)),
|
||||
registration_(registration),
|
||||
tensors_(tensors),
|
||||
error_reporter_(error_reporter) {
|
||||
// Prepare TfLiteContext:
|
||||
context_.impl_ = static_cast<void*>(this);
|
||||
context_.ReportError = ReportOpError;
|
||||
context_.recommended_num_threads = 1;
|
||||
context_.GetTensor = GetTensor;
|
||||
context_.GetEvalTensor = GetEvalTensor;
|
||||
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
|
||||
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
|
||||
context_.GetScratchBuffer = GetScratchBuffer;
|
||||
|
||||
// Prepare TfLiteNode:
|
||||
node_.inputs = inputs;
|
||||
node_.outputs = outputs;
|
||||
node_.builtin_data = builtin_data;
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data) {
|
||||
if (registration_.init) {
|
||||
node_.user_data = registration_.init(&context_, init_data, /*length=*/0);
|
||||
}
|
||||
if (registration_.prepare) {
|
||||
TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::Invoke() {
|
||||
if (registration_.invoke == nullptr) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"TfLiteRegistration missing invoke function pointer!");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return registration_.invoke(&context_, &node_);
|
||||
}
|
||||
|
||||
TfLiteTensor* KernelRunner::GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
return &runner->tensors_[tensor_index];
|
||||
}
|
||||
|
||||
TfLiteEvalTensor* KernelRunner::GetEvalTensor(
|
||||
const struct TfLiteContext* context, int tensor_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
TfLiteEvalTensor* eval_tensor =
|
||||
reinterpret_cast<TfLiteEvalTensor*>(runner->allocator_->AllocateTemp(
|
||||
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
|
||||
TFLITE_DCHECK(eval_tensor != nullptr);
|
||||
|
||||
// In unit tests, the TfLiteTensor pointer contains the source of truth for
|
||||
// buffers and values:
|
||||
eval_tensor->data = runner->tensors_[tensor_index].data;
|
||||
eval_tensor->dims = runner->tensors_[tensor_index].dims;
|
||||
eval_tensor->type = runner->tensors_[tensor_index].type;
|
||||
return eval_tensor;
|
||||
}
|
||||
|
||||
void* KernelRunner::AllocatePersistentBuffer(TfLiteContext* context,
|
||||
size_t bytes) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
return runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
|
||||
}
|
||||
|
||||
TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context,
|
||||
size_t bytes,
|
||||
int* buffer_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(buffer_index != nullptr);
|
||||
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
if (runner->scratch_buffer_count_ == kNumScratchBuffers_) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
runner->error_reporter_,
|
||||
"Exceeded the maximum number of scratch tensors allowed (%d).",
|
||||
kNumScratchBuffers_);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
// For tests, we allocate scratch buffers from the tail and keep them around
|
||||
// for the lifetime of model. This means that the arena size in the tests will
|
||||
// be more than what we would have if the scratch buffers could share memory.
|
||||
runner->scratch_buffers_[runner->scratch_buffer_count_] =
|
||||
runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
|
||||
TFLITE_DCHECK(runner->scratch_buffers_[runner->scratch_buffer_count_] !=
|
||||
nullptr);
|
||||
|
||||
*buffer_index = runner->scratch_buffer_count_++;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
TFLITE_DCHECK(runner->scratch_buffer_count_ <= kNumScratchBuffers_);
|
||||
if (buffer_index >= runner->scratch_buffer_count_) {
|
||||
return nullptr;
|
||||
}
|
||||
return runner->scratch_buffers_[buffer_index];
|
||||
}
|
||||
|
||||
void KernelRunner::ReportOpError(struct TfLiteContext* context,
|
||||
const char* format, ...) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
|
||||
TFLITE_DCHECK(runner != nullptr);
|
||||
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
TF_LITE_REPORT_ERROR(runner->error_reporter_, format, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
@@ -0,0 +1,83 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/simple_memory_allocator.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle
|
||||
// (init, prepare, invoke). All internal allocations are handled by this class.
|
||||
// Simply pass in the registration, list of required tensors, inputs array,
|
||||
// outputs array, and any pre-builtin data. Calling Invoke() will automatically
|
||||
// walk the kernl and outputs will be ready on the the TfLiteTensor output
|
||||
// provided during construction.
|
||||
class KernelRunner {
|
||||
public:
|
||||
KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
|
||||
int tensors_size, TfLiteIntArray* inputs,
|
||||
TfLiteIntArray* outputs, void* builtin_data,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
|
||||
// exceptions will be reported through the error_reporter and returned as a
|
||||
// status code here.
|
||||
TfLiteStatus InitAndPrepare(const char* init_data = nullptr);
|
||||
|
||||
// Calls init, prepare, and invoke on a given TfLiteRegistration pointer.
|
||||
// After successful invoke, results will be available in the output tensor as
|
||||
// passed into the constructor of this class.
|
||||
TfLiteStatus Invoke();
|
||||
|
||||
protected:
|
||||
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
|
||||
int tensor_index);
|
||||
static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
|
||||
int tensor_index);
|
||||
static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
|
||||
static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
|
||||
size_t bytes,
|
||||
int* buffer_index);
|
||||
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
|
||||
static void ReportOpError(struct TfLiteContext* context, const char* format,
|
||||
...);
|
||||
|
||||
private:
|
||||
static constexpr int kNumScratchBuffers_ = 5;
|
||||
|
||||
static constexpr int kKernelRunnerBufferSize_ = 10000;
|
||||
static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
|
||||
|
||||
SimpleMemoryAllocator* allocator_ = nullptr;
|
||||
const TfLiteRegistration& registration_;
|
||||
TfLiteTensor* tensors_ = nullptr;
|
||||
ErrorReporter* error_reporter_ = nullptr;
|
||||
|
||||
TfLiteContext context_ = {};
|
||||
TfLiteNode node_ = {};
|
||||
|
||||
int scratch_buffer_count_ = 0;
|
||||
uint8_t* scratch_buffers_[kNumScratchBuffers_];
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_
|
||||
@@ -0,0 +1,41 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
bool HaveSameShapes(const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2) {
|
||||
TFLITE_DCHECK(input1 != nullptr);
|
||||
TFLITE_DCHECK(input2 != nullptr);
|
||||
return TfLiteIntArrayEqual(input1->dims, input2->dims);
|
||||
}
|
||||
|
||||
const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) {
|
||||
if (tensor == nullptr || tensor->dims == nullptr) {
|
||||
return RuntimeShape();
|
||||
}
|
||||
TfLiteIntArray* dims = tensor->dims;
|
||||
const int dims_size = dims->size;
|
||||
const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data);
|
||||
return RuntimeShape(dims_size, dims_data);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
75
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_util.h
Normal file
75
code/lib/tfmicro/tensorflow/lite/micro/kernels/kernel_util.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace micro {
|
||||
|
||||
// Returns a mutable tensor for a given input index. is_variable must be checked
|
||||
// during prepare when the full TfLiteTensor is available.
|
||||
inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
int index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
return context->GetEvalTensor(context, node->inputs->data[index]);
|
||||
}
|
||||
|
||||
// Returns the TfLiteEvalTensor struct for a given input index in a node.
|
||||
inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
return GetMutableEvalInput(context, node, index);
|
||||
}
|
||||
|
||||
// Returns the TfLiteEvalTensor struct for a given output index in a node.
|
||||
inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
|
||||
const TfLiteNode* node, int index) {
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(node != nullptr);
|
||||
return context->GetEvalTensor(context, node->outputs->data[index]);
|
||||
}
|
||||
|
||||
// Returns data for a TfLiteEvalTensor struct.
|
||||
template <typename T>
|
||||
T* GetTensorData(TfLiteEvalTensor* tensor) {
|
||||
return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
|
||||
}
|
||||
|
||||
// Returns const data for a TfLiteEvalTensor struct.
|
||||
template <typename T>
|
||||
const T* GetTensorData(const TfLiteEvalTensor* tensor) {
|
||||
TFLITE_DCHECK(tensor != nullptr);
|
||||
return reinterpret_cast<const T*>(tensor->data.raw);
|
||||
}
|
||||
|
||||
// Returns the shape of a TfLiteEvalTensor struct.
|
||||
const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor);
|
||||
|
||||
// Return true if the given tensors have the same shape.
|
||||
bool HaveSameShapes(const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2);
|
||||
|
||||
} // namespace micro
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_
|
||||
@@ -14,16 +14,19 @@ limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace l2norm {
|
||||
|
||||
namespace {
|
||||
|
||||
// This file has two implementation of L2Norm.
|
||||
enum KernelType {
|
||||
kReference,
|
||||
@@ -33,44 +36,59 @@ enum KernelType {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(DEBUG)
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* params = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
|
||||
L2NormalizationParams* data =
|
||||
static_cast<L2NormalizationParams*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
|
||||
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
|
||||
output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.scale, (1. / 128.));
|
||||
if (output->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 128);
|
||||
}
|
||||
if (output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
}
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
data->input_zero_point = 0;
|
||||
}
|
||||
|
||||
// TODO(ahentz): For some reason our implementations don't support
|
||||
// activations.
|
||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
|
||||
#endif
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context,
|
||||
sizeof(L2NormalizationParams));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const L2NormalizationParams& data =
|
||||
*(static_cast<const L2NormalizationParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/143912164): instead of hardcode the epsilon here, we should read it
|
||||
// from tensorflow, i.e., adding a params.
|
||||
@@ -87,39 +105,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
// So we don't even need to do handle the epsilon for quantized kernel case.
|
||||
const float epsilon = 1e-6f;
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
#define TF_LITE_L2NORM(type) \
|
||||
tflite::L2NormalizationParams op_params; \
|
||||
op_params.input_zero_point = 0; \
|
||||
type::L2Normalization(op_params, GetTensorShape(input), \
|
||||
GetTensorData<float>(input), GetTensorShape(output), \
|
||||
GetTensorData<float>(output), epsilon)
|
||||
|
||||
TF_LITE_L2NORM(reference_ops);
|
||||
#undef TF_LITE_L2NORM
|
||||
reference_ops::L2Normalization(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
epsilon);
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
#define TF_LITE_L2NORM(type) \
|
||||
tflite::L2NormalizationParams op_params; \
|
||||
op_params.input_zero_point = input->params.zero_point; \
|
||||
type::L2Normalization(op_params, GetTensorShape(input), \
|
||||
GetTensorData<uint8>(input), GetTensorShape(output), \
|
||||
GetTensorData<uint8>(output))
|
||||
|
||||
TF_LITE_L2NORM(reference_ops);
|
||||
#undef TF_LITE_L2NORM
|
||||
reference_ops::L2Normalization(
|
||||
data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
const auto input_shape = GetTensorShape(input);
|
||||
const auto output_shape = GetTensorShape(output);
|
||||
const auto input_shape = tflite::micro::GetTensorShape(input);
|
||||
const auto output_shape = tflite::micro::GetTensorShape(output);
|
||||
const int trailing_dim = input_shape.DimensionsCount() - 1;
|
||||
const int depth =
|
||||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
|
||||
const int outer_size =
|
||||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
|
||||
reference_integer_ops::L2Normalization(input->params.zero_point, outer_size,
|
||||
depth, GetTensorData<int8>(input),
|
||||
GetTensorData<int8>(output));
|
||||
reference_integer_ops::L2Normalization(
|
||||
data.input_zero_point, outer_size, depth,
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float.",
|
||||
output->type);
|
||||
TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
@@ -128,22 +139,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace l2norm
|
||||
|
||||
TfLiteRegistration* Register_L2NORM_REF() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/l2norm::Prepare,
|
||||
/*invoke=*/l2norm::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
|
||||
return &r;
|
||||
TfLiteRegistration Register_L2NORM_REF() {
|
||||
return {/*init=*/l2norm::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/l2norm::Prepare,
|
||||
/*invoke=*/l2norm::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_L2_NORMALIZATION() {
|
||||
return Register_L2NORM_REF();
|
||||
}
|
||||
TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); }
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@@ -15,8 +15,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -31,20 +31,29 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
bool (*func)(bool, bool)) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (HaveSameShapes(input1, input2)) {
|
||||
if (tflite::micro::HaveSameShapes(input1, input2)) {
|
||||
reference_ops::BinaryFunction<bool, bool, bool>(
|
||||
GetTensorShape(input1), GetTensorData<bool>(input1),
|
||||
GetTensorShape(input2), GetTensorData<bool>(input2),
|
||||
GetTensorShape(output), GetTensorData<bool>(output), func);
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
} else {
|
||||
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
|
||||
GetTensorShape(input1), GetTensorData<bool>(input1),
|
||||
GetTensorShape(input2), GetTensorData<bool>(input2),
|
||||
GetTensorShape(output), GetTensorData<bool>(output), func);
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
@@ -65,32 +74,30 @@ TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace logical
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_OR() {
|
||||
TfLiteRegistration Register_LOGICAL_OR() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalOrEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalOrEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_LOGICAL_AND() {
|
||||
TfLiteRegistration Register_LOGICAL_AND() {
|
||||
// Init, Free, Prepare, Eval are satisfying the Interface required by
|
||||
// TfLiteRegistration.
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalAndEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/logical::LogicalAndEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -23,6 +23,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -42,9 +43,11 @@ struct OpData {
|
||||
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
OpData* data) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
std::numeric_limits<int8_t>::min());
|
||||
@@ -54,6 +57,8 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
|
||||
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||
|
||||
@@ -64,18 +69,34 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
return CalculateArithmeticOpData(context, node, data);
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
OpData data;
|
||||
CalculateArithmeticOpData(context, node, &data);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Logistic(
|
||||
GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Logistic(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
@@ -88,10 +109,11 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::Logistic(
|
||||
input->params.zero_point, data.input_range_radius,
|
||||
data.input_multiplier, data.input_left_shift,
|
||||
NumElements(input->dims), GetTensorData<int8_t>(input),
|
||||
GetTensorData<int8_t>(output));
|
||||
data->input_zero_point, data->input_range_radius,
|
||||
data->input_multiplier, data->input_left_shift,
|
||||
NumElements(input->dims),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
@@ -113,16 +135,15 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_LOGISTIC() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/activations::LogisticEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_LOGISTIC() {
|
||||
return {/*init=*/activations::LogisticInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::LogisticPrepare,
|
||||
/*invoke=*/activations::LogisticEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -40,13 +41,13 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpContext {
|
||||
OpContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input1 = GetInput(context, node, kInputTensor1);
|
||||
input2 = GetInput(context, node, kInputTensor2);
|
||||
output = GetOutput(context, node, kOutputTensor);
|
||||
input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
output = tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
}
|
||||
const TfLiteTensor* input1;
|
||||
const TfLiteTensor* input2;
|
||||
TfLiteTensor* output;
|
||||
const TfLiteEvalTensor* input1;
|
||||
const TfLiteEvalTensor* input2;
|
||||
TfLiteEvalTensor* output;
|
||||
};
|
||||
|
||||
struct MaximumOp {
|
||||
@@ -69,12 +70,12 @@ template <typename data_type, typename op_type>
|
||||
void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpContext& op_context) {
|
||||
reference_ops::MaximumMinimumBroadcastSlow(
|
||||
GetTensorShape(op_context.input1),
|
||||
GetTensorData<data_type>(op_context.input1),
|
||||
GetTensorShape(op_context.input2),
|
||||
GetTensorData<data_type>(op_context.input2),
|
||||
GetTensorShape(op_context.output),
|
||||
GetTensorData<data_type>(op_context.output),
|
||||
tflite::micro::GetTensorShape(op_context.input1),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.input1),
|
||||
tflite::micro::GetTensorShape(op_context.input2),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.input2),
|
||||
tflite::micro::GetTensorShape(op_context.output),
|
||||
tflite::micro::GetTensorData<data_type>(op_context.output),
|
||||
op_type::template op<data_type>);
|
||||
}
|
||||
|
||||
@@ -116,34 +117,30 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace maximum_minimum
|
||||
|
||||
TfLiteRegistration* Register_MAXIMUM() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MaximumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MAXIMUM() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MaximumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_MINIMUM() {
|
||||
static TfLiteRegistration r = {
|
||||
/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MinimumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MINIMUM() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/
|
||||
maximum_minimum::Eval<maximum_minimum::kReference,
|
||||
maximum_minimum::MinimumOp>,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -17,10 +17,6 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Forward declaration of all micro op kernel registration methods. These
|
||||
// registrations are included with the standard `BuiltinOpResolver`.
|
||||
//
|
||||
@@ -29,58 +25,73 @@ namespace micro {
|
||||
// their model requires, using a custom `(Micro)MutableOpResolver`. Selective
|
||||
// registration in turn allows the linker to strip unused kernels.
|
||||
|
||||
TfLiteRegistration* Register_ABS();
|
||||
TfLiteRegistration* Register_ADD();
|
||||
TfLiteRegistration* Register_ARG_MAX();
|
||||
TfLiteRegistration* Register_ARG_MIN();
|
||||
TfLiteRegistration* Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration* Register_CEIL();
|
||||
namespace tflite {
|
||||
|
||||
// TFLM is incrementally moving towards a flat tflite namespace
|
||||
// (https://abseil.io/tips/130). Any new ops (or cleanup of existing ops should
|
||||
// have their Register function declarations in the tflite namespace.
|
||||
|
||||
TfLiteRegistration Register_CONV_2D();
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration Register_QUANTIZE();
|
||||
TfLiteRegistration Register_SHAPE();
|
||||
TfLiteRegistration Register_SOFTMAX();
|
||||
TfLiteRegistration Register_SVDF();
|
||||
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
TfLiteRegistration Register_ABS();
|
||||
TfLiteRegistration Register_ADD();
|
||||
TfLiteRegistration Register_ARG_MAX();
|
||||
TfLiteRegistration Register_ARG_MIN();
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D();
|
||||
TfLiteRegistration Register_CEIL();
|
||||
// TODO(b/160234179): Change custom OPs to also return by value.
|
||||
TfLiteRegistration* Register_CIRCULAR_BUFFER();
|
||||
TfLiteRegistration* Register_CONV_2D();
|
||||
TfLiteRegistration* Register_CONCATENATION();
|
||||
TfLiteRegistration* Register_COS();
|
||||
TfLiteRegistration* Register_DEPTHWISE_CONV_2D();
|
||||
TfLiteRegistration* Register_DEQUANTIZE();
|
||||
TfLiteRegistration* Register_EQUAL();
|
||||
TfLiteRegistration* Register_FLOOR();
|
||||
TfLiteRegistration* Register_FULLY_CONNECTED();
|
||||
TfLiteRegistration* Register_GREATER();
|
||||
TfLiteRegistration* Register_GREATER_EQUAL();
|
||||
TfLiteRegistration* Register_LESS();
|
||||
TfLiteRegistration* Register_LESS_EQUAL();
|
||||
TfLiteRegistration* Register_LOG();
|
||||
TfLiteRegistration* Register_LOGICAL_AND();
|
||||
TfLiteRegistration* Register_LOGICAL_NOT();
|
||||
TfLiteRegistration* Register_LOGICAL_OR();
|
||||
TfLiteRegistration* Register_LOGISTIC();
|
||||
TfLiteRegistration* Register_MAXIMUM();
|
||||
TfLiteRegistration* Register_MAX_POOL_2D();
|
||||
TfLiteRegistration* Register_MEAN();
|
||||
TfLiteRegistration* Register_MINIMUM();
|
||||
TfLiteRegistration* Register_MUL();
|
||||
TfLiteRegistration* Register_NEG();
|
||||
TfLiteRegistration* Register_NOT_EQUAL();
|
||||
TfLiteRegistration* Register_PACK();
|
||||
TfLiteRegistration* Register_PAD();
|
||||
TfLiteRegistration* Register_PADV2();
|
||||
TfLiteRegistration* Register_PRELU();
|
||||
TfLiteRegistration* Register_QUANTIZE();
|
||||
TfLiteRegistration* Register_RELU();
|
||||
TfLiteRegistration* Register_RELU6();
|
||||
TfLiteRegistration* Register_RESHAPE();
|
||||
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration* Register_ROUND();
|
||||
TfLiteRegistration* Register_RSQRT();
|
||||
TfLiteRegistration* Register_SIN();
|
||||
TfLiteRegistration* Register_SOFTMAX();
|
||||
TfLiteRegistration* Register_SPLIT();
|
||||
TfLiteRegistration* Register_SQRT();
|
||||
TfLiteRegistration* Register_SQUARE();
|
||||
TfLiteRegistration* Register_STRIDED_SLICE();
|
||||
TfLiteRegistration* Register_SUB();
|
||||
TfLiteRegistration* Register_SVDF();
|
||||
TfLiteRegistration* Register_UNPACK();
|
||||
TfLiteRegistration* Register_L2_NORMALIZATION();
|
||||
TfLiteRegistration Register_CONCATENATION();
|
||||
TfLiteRegistration Register_COS();
|
||||
TfLiteRegistration Register_DEQUANTIZE();
|
||||
TfLiteRegistration Register_EQUAL();
|
||||
TfLiteRegistration Register_FLOOR();
|
||||
TfLiteRegistration Register_GREATER();
|
||||
TfLiteRegistration Register_GREATER_EQUAL();
|
||||
TfLiteRegistration Register_HARD_SWISH();
|
||||
TfLiteRegistration Register_LESS();
|
||||
TfLiteRegistration Register_LESS_EQUAL();
|
||||
TfLiteRegistration Register_LOG();
|
||||
TfLiteRegistration Register_LOGICAL_AND();
|
||||
TfLiteRegistration Register_LOGICAL_NOT();
|
||||
TfLiteRegistration Register_LOGICAL_OR();
|
||||
TfLiteRegistration Register_LOGISTIC();
|
||||
TfLiteRegistration Register_MAXIMUM();
|
||||
TfLiteRegistration Register_MAX_POOL_2D();
|
||||
TfLiteRegistration Register_MEAN();
|
||||
TfLiteRegistration Register_MINIMUM();
|
||||
TfLiteRegistration Register_MUL();
|
||||
TfLiteRegistration Register_NEG();
|
||||
TfLiteRegistration Register_NOT_EQUAL();
|
||||
TfLiteRegistration Register_PACK();
|
||||
TfLiteRegistration Register_PAD();
|
||||
TfLiteRegistration Register_PADV2();
|
||||
TfLiteRegistration Register_PRELU();
|
||||
TfLiteRegistration Register_REDUCE_MAX();
|
||||
TfLiteRegistration Register_RELU();
|
||||
TfLiteRegistration Register_RELU6();
|
||||
TfLiteRegistration Register_RESHAPE();
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
|
||||
TfLiteRegistration Register_ROUND();
|
||||
TfLiteRegistration Register_RSQRT();
|
||||
TfLiteRegistration Register_SIN();
|
||||
TfLiteRegistration Register_SPLIT();
|
||||
TfLiteRegistration Register_SPLIT_V();
|
||||
TfLiteRegistration Register_SQRT();
|
||||
TfLiteRegistration Register_SQUARE();
|
||||
TfLiteRegistration Register_STRIDED_SLICE();
|
||||
TfLiteRegistration Register_SUB();
|
||||
TfLiteRegistration Register_UNPACK();
|
||||
TfLiteRegistration Register_L2_NORMALIZATION();
|
||||
TfLiteRegistration Register_TANH();
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
|
||||
@@ -21,132 +21,194 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace mul {
|
||||
namespace {
|
||||
|
||||
constexpr int kInput1Tensor = 0;
|
||||
constexpr int kInput2Tensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
int32_t input1_zero_point;
|
||||
int32_t input2_zero_point;
|
||||
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
int32_t output_zero_point;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
float output_activation_min_f32;
|
||||
float output_activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data) {
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input1->type, input2->type);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
double real_multiplier = static_cast<double>(input1->params.scale) *
|
||||
static_cast<double>(input2->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
|
||||
data->input1_zero_point = input1->params.zero_point;
|
||||
data->input2_zero_point = input2->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
} else {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
op_params.input1_offset = -input1->params.zero_point;
|
||||
op_params.input2_offset = -input2->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
} // namespace
|
||||
|
||||
#define TF_LITE_MUL(type, opname, dtype) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||
GetTensorData<dtype>(output));
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
op_params.input1_offset = -data->input1_zero_point;
|
||||
op_params.input2_offset = -data->input2_zero_point;
|
||||
op_params.output_offset = data->output_zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
|
||||
} else {
|
||||
TF_LITE_MUL(reference_ops, Mul, uint8_t);
|
||||
}
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<uint8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<uint8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
}
|
||||
|
||||
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpData* data,
|
||||
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
TfLiteMulParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.float_activation_min = data->output_activation_min_f32;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
#define TF_LITE_MUL(opname) \
|
||||
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||
GetTensorData<float>(output));
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (need_broadcast) {
|
||||
TF_LITE_MUL(BroadcastMul4DSlow);
|
||||
reference_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_MUL(Mul);
|
||||
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
#undef TF_LITE_MUL
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
return CalculateOpData(context, node, params, data);
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
CalculateOpData(context, node, params, &data);
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput1Tensor);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInput2Tensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input1->type) {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
EvalQuantized(context, node, params, &data, input1, input2, output);
|
||||
EvalQuantized(context, node, data, input1, input2, output);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
EvalFloat(context, node, params, &data, input1, input2, output);
|
||||
EvalFloat(context, node, params, data, input1, input2, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -158,16 +220,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} // namespace mul
|
||||
|
||||
TfLiteRegistration* Register_MUL() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/mul::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MUL() {
|
||||
return {/*init=*/mul::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/mul::Prepare,
|
||||
/*invoke=*/mul::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -17,7 +17,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,14 +28,17 @@ constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
switch (input->type) {
|
||||
// TODO(wangtz): handle for kTfLiteInt8
|
||||
case kTfLiteFloat32:
|
||||
reference_ops::Negate(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
reference_ops::Negate(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
@@ -47,16 +50,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace neg
|
||||
|
||||
TfLiteRegistration* Register_NEG() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/neg::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_NEG() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/neg::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -16,7 +16,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -28,9 +28,11 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteTensor* output, int values_count, int axis) {
|
||||
TfLiteEvalTensor* output, int values_count, int axis) {
|
||||
const TfLiteEvalTensor* input0 =
|
||||
tflite::micro::GetEvalInput(context, node, 0);
|
||||
|
||||
const int dimensions = output->dims->size;
|
||||
const TfLiteTensor* input0 = GetInput(context, node, 0);
|
||||
const TfLiteIntArray* input_dims = input0->dims;
|
||||
const TfLiteIntArray* output_dims = output->dims;
|
||||
|
||||
@@ -52,11 +54,11 @@ TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
|
||||
|
||||
T* output_data = GetTensorData<T>(output);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(output);
|
||||
|
||||
for (int i = 0; i < values_count; ++i) {
|
||||
const TfLiteTensor* t = GetInput(context, node, i);
|
||||
const T* input_data = GetTensorData<T>(t);
|
||||
const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i);
|
||||
const T* input_data = tflite::micro::GetTensorData<T>(t);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
const T* input_ptr = input_data + copy_size * k;
|
||||
int loc = k * values_count * copy_size + i * copy_size;
|
||||
@@ -72,7 +74,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLitePackParams* data =
|
||||
reinterpret_cast<TfLitePackParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@@ -108,16 +111,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
} // namespace
|
||||
} // namespace pack
|
||||
|
||||
TfLiteRegistration* Register_PACK() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PACK() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pack::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -16,189 +16,208 @@ limitations under the License.
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
#ifdef MEMORY_SANITIZER
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#else
|
||||
#define __msan_check_mem_is_initialized(ptr, size)
|
||||
#endif
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/portable_tensor.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace pad {
|
||||
namespace {
|
||||
|
||||
struct PadContext {
|
||||
PadContext(TfLiteContext* context, TfLiteNode* node) {
|
||||
input = GetInput(context, node, 0);
|
||||
paddings = GetInput(context, node, 1);
|
||||
constant_values = nullptr;
|
||||
if (NumInputs(node) == 3) {
|
||||
constant_values = GetOptionalInputTensor(context, node, 2);
|
||||
} else {
|
||||
constant_values = nullptr;
|
||||
}
|
||||
output = GetOutput(context, node, 0);
|
||||
dims = NumDimensions(input);
|
||||
|
||||
resizing_category = ResizingCategory::kGenericResize;
|
||||
const int paddings_total = GetTensorShape(paddings).FlatSize();
|
||||
const int32* paddings_data = GetTensorData<int32>(paddings);
|
||||
// Paddings will be a n,2 array, and we need to detect 4D arrays with the
|
||||
// pattern { {0,0}, {a, b}, {c, d}, {0,0} }.
|
||||
if (IsConstantTensor(paddings) && paddings_total == 8 &&
|
||||
(paddings_data[0] == 0 && paddings_data[1] == 0) &&
|
||||
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
|
||||
resizing_category = ResizingCategory::kImageStyle;
|
||||
}
|
||||
}
|
||||
const TfLiteTensor* constant_values;
|
||||
const TfLiteTensor* input;
|
||||
const TfLiteTensor* paddings;
|
||||
TfLiteTensor* output;
|
||||
int dims;
|
||||
ResizingCategory resizing_category;
|
||||
struct OpData {
|
||||
PadParams params;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
PadContext op_context(context, node);
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type);
|
||||
if (op_context.constant_values != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, op_context.input->type,
|
||||
op_context.constant_values->type);
|
||||
const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
|
||||
TF_LITE_ENSURE(context, paddings != nullptr);
|
||||
const TfLiteTensor* constant_values =
|
||||
NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
|
||||
TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
|
||||
// Current implementations rely on the inputs being <= 4D.
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) <=
|
||||
reference_ops::PadKernelMaxDimensionCount());
|
||||
|
||||
if (constant_values != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
|
||||
// Ensure that constant_values is a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
|
||||
}
|
||||
|
||||
// There must be a pair of paddings for each output dimension.
|
||||
TF_LITE_ENSURE_EQ(context, GetTensorShape(op_context.paddings).FlatSize(),
|
||||
op_context.output->dims->size * 2);
|
||||
TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
|
||||
output->dims->size * 2);
|
||||
|
||||
// On Micro, outputs must be properly sized by the converter.
|
||||
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
|
||||
for (int i = 0; i < op_context.output->dims->size; i++) {
|
||||
int output_dim = op_context.output->dims->data[i];
|
||||
int expected_dim = op_context.input->dims->data[i] + paddings_data[i * 2] +
|
||||
paddings_data[i * 2 + 1];
|
||||
// NOTE: This data is only available because the paddings buffer is stored in
|
||||
// the flatbuffer:
|
||||
TF_LITE_ENSURE(context, IsConstantTensor(paddings));
|
||||
const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
|
||||
for (int i = 0; i < output->dims->size; i++) {
|
||||
int output_dim = output->dims->data[i];
|
||||
int expected_dim =
|
||||
input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
|
||||
TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
|
||||
}
|
||||
|
||||
// Current implementations rely on the inputs being <= 4D.
|
||||
TF_LITE_ENSURE(
|
||||
context, op_context.dims <= reference_ops::PadKernelMaxDimensionCount());
|
||||
TF_LITE_ENSURE(context, IsConstantTensor(op_context.paddings));
|
||||
// Calculate OpData:
|
||||
data->params.resizing_category = ResizingCategory::kGenericResize;
|
||||
const int paddings_total = GetTensorShape(paddings).FlatSize();
|
||||
if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
|
||||
(paddings_data[6] == 0 && paddings_data[7] == 0)) {
|
||||
data->params.resizing_category = ResizingCategory::kImageStyle;
|
||||
}
|
||||
|
||||
const int num_input_dimensions = NumDimensions(input);
|
||||
data->params.left_padding_count = num_input_dimensions;
|
||||
data->params.right_padding_count = num_input_dimensions;
|
||||
|
||||
for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
|
||||
data->params.left_padding[idx] = paddings_data[idx * 2];
|
||||
data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
|
||||
}
|
||||
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
if (constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE(context, output->params.zero_point >=
|
||||
std::numeric_limits<uint8_t>::min());
|
||||
TF_LITE_ENSURE(context, output->params.zero_point <=
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
} else {
|
||||
TF_LITE_ENSURE(context, output->params.zero_point >=
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TF_LITE_ENSURE(context, output->params.zero_point <=
|
||||
std::numeric_limits<int8_t>::max());
|
||||
}
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
|
||||
static_cast<double>(constant_values->params.scale));
|
||||
}
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
PadContext op_context(context, node);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
if (op_context.constant_values != nullptr) {
|
||||
// Ensure that constant_values is a scalar.
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(op_context.constant_values), 1);
|
||||
}
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, /*index=*/0);
|
||||
const TfLiteEvalTensor* constant_values =
|
||||
NumInputs(node) == 3
|
||||
? tflite::micro::GetEvalInput(context, node, /*index=*/2)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, /*index=*/0);
|
||||
|
||||
// Create before and after padding arrays that are accepted by the kernel.
|
||||
const int32* paddings_data = GetTensorData<int32>(op_context.paddings);
|
||||
|
||||
tflite::PadParams op_params;
|
||||
memset(&op_params, 0, sizeof(PadParams));
|
||||
op_params.left_padding_count = op_context.dims;
|
||||
op_params.right_padding_count = op_context.dims;
|
||||
|
||||
for (int idx = op_context.dims - 1; idx >= 0; --idx) {
|
||||
op_params.left_padding[idx] = paddings_data[idx * 2];
|
||||
op_params.right_padding[idx] = paddings_data[idx * 2 + 1];
|
||||
}
|
||||
|
||||
#define TF_LITE_PAD(type, op_name, scalar, pad_value) \
|
||||
const scalar pad_value_copy = pad_value; \
|
||||
\
|
||||
type::op_name(op_params, GetTensorShape(op_context.input), \
|
||||
GetTensorData<scalar>(op_context.input), &pad_value_copy, \
|
||||
GetTensorShape(op_context.output), \
|
||||
GetTensorData<scalar>(op_context.output))
|
||||
switch (op_context.input->type) {
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
float pad_value = op_context.constant_values == nullptr
|
||||
? 0.f
|
||||
: *GetTensorData<float>(op_context.constant_values);
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, float, pad_value);
|
||||
float pad_value =
|
||||
constant_values == nullptr
|
||||
? 0.f
|
||||
: *tflite::micro::GetTensorData<float>(constant_values);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, float, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
uint8_t pad_value;
|
||||
if (op_context.constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
|
||||
std::numeric_limits<uint8_t>::min());
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
|
||||
std::numeric_limits<uint8_t>::max());
|
||||
pad_value = static_cast<uint8_t>(op_context.output->params.zero_point);
|
||||
if (constant_values == nullptr) {
|
||||
pad_value = static_cast<uint8_t>(data->output_zero_point);
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context, static_cast<double>(op_context.output->params.scale),
|
||||
static_cast<double>(op_context.constant_values->params.scale));
|
||||
pad_value = *GetTensorData<uint8_t>(op_context.constant_values);
|
||||
pad_value = *tflite::micro::GetTensorData<uint8_t>(constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, uint8_t, pad_value);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, uint8_t, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
int8_t pad_value;
|
||||
if (op_context.constant_values == nullptr) {
|
||||
// Quantized Pad requires that 0 is represented in the quantized
|
||||
// range.
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
|
||||
std::numeric_limits<int8_t>::min());
|
||||
TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
|
||||
std::numeric_limits<int8_t>::max());
|
||||
pad_value = static_cast<int8_t>(op_context.output->params.zero_point);
|
||||
if (constant_values == nullptr) {
|
||||
pad_value = static_cast<uint8_t>(data->output_zero_point);
|
||||
} else {
|
||||
// Quantized Pad requires that 'constant_values' is represented in the
|
||||
// same quantized range as the input and output tensors.
|
||||
TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
|
||||
op_context.constant_values->params.zero_point);
|
||||
TF_LITE_ENSURE(context, op_context.output->params.scale ==
|
||||
op_context.constant_values->params.scale);
|
||||
pad_value = *GetTensorData<int8_t>(op_context.constant_values);
|
||||
pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
|
||||
}
|
||||
if (op_context.resizing_category == ResizingCategory::kImageStyle) {
|
||||
TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value);
|
||||
if (data->params.resizing_category == ResizingCategory::kImageStyle) {
|
||||
reference_ops::PadImageStyle(
|
||||
data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input), &pad_value,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt32: {
|
||||
int32_t pad_value =
|
||||
op_context.constant_values == nullptr
|
||||
constant_values == nullptr
|
||||
? 0
|
||||
: *GetTensorData<int32_t>(op_context.constant_values);
|
||||
TF_LITE_PAD(reference_ops, Pad, int32_t, pad_value);
|
||||
: *tflite::micro::GetTensorData<int32_t>(constant_values);
|
||||
reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int32_t>(input),
|
||||
&pad_value, tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} break;
|
||||
default:
|
||||
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
|
||||
TfLiteTypeGetName(op_context.input->type));
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
#undef TF_LITE_PAD
|
||||
@@ -207,29 +226,27 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace pad
|
||||
|
||||
TfLiteRegistration* Register_PAD() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PAD() {
|
||||
return {/*init=*/pad::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
// Also register Pad as PadV2.
|
||||
TfLiteRegistration* Register_PADV2() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PADV2() {
|
||||
return {/*init=*/pad::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pad::Prepare,
|
||||
/*invoke=*/pad::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -19,6 +19,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -32,6 +33,10 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
TfLitePaddingValues padding;
|
||||
int32_t activation_min;
|
||||
int32_t activation_max;
|
||||
float activation_min_f32;
|
||||
float activation_max_f32;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpData(const TfLiteContext* context,
|
||||
@@ -55,11 +60,7 @@ TfLiteStatus CalculateOpData(const TfLiteContext* context,
|
||||
|
||||
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@@ -67,20 +68,19 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
int32_t activation_min, activation_max;
|
||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&activation_min, &activation_max);
|
||||
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
@@ -89,27 +89,26 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = activation_min;
|
||||
op_params.quantized_activation_max = activation_max;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::AveragePool(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
float activation_min, activation_max;
|
||||
CalculateActivationRange(params->activation, &activation_min,
|
||||
&activation_max);
|
||||
|
||||
TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@@ -117,22 +116,17 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = activation_min;
|
||||
op_params.float_activation_max = activation_max;
|
||||
reference_ops::MaxPool(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, OpData* data,
|
||||
const TfLiteTensor* input, TfLiteTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||
|
||||
int32_t activation_min, activation_max;
|
||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&activation_min, &activation_max);
|
||||
|
||||
TfLitePoolParams* params, const OpData* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
@@ -140,39 +134,44 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = activation_min;
|
||||
op_params.quantized_activation_max = activation_max;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
reference_ops::MaxPool(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::MaxPool(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
||||
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// Inputs and outputs share the same type, guaranteed by the converter.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
AverageEvalFloat(context, node, params, &data, input, output);
|
||||
AverageEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
AverageEvalQuantized(context, node, params, &data, input, output);
|
||||
AverageEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
|
||||
@@ -183,21 +182,24 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
OpData data;
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
MaxEvalFloat(context, node, params, &data, input, output);
|
||||
MaxEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt8:
|
||||
MaxEvalQuantized(context, node, params, &data, input, output);
|
||||
MaxEvalQuantized(context, node, params, data, input, output);
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
|
||||
@@ -207,30 +209,59 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration* Register_AVERAGE_POOL_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteRegistration* Register_MAX_POOL_2D() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/nullptr,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation, &data->activation_min_f32,
|
||||
&data->activation_max_f32);
|
||||
} else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&data->activation_min,
|
||||
&data->activation_max);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace pooling
|
||||
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::Prepare,
|
||||
/*invoke=*/pooling::AverageEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MAX_POOL_2D() {
|
||||
return {/*init=*/pooling::Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/pooling::Prepare,
|
||||
/*invoke=*/pooling::MaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -15,20 +15,45 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
|
||||
const TfLiteTensor* alpha,
|
||||
TfLiteTensor* output, PreluParams* params) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt16) {
|
||||
double real_multiplier_1 = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
double real_multiplier_2 = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier_1, ¶ms->output_multiplier_1,
|
||||
¶ms->output_shift_1);
|
||||
QuantizeMultiplier(real_multiplier_2, ¶ms->output_multiplier_2,
|
||||
¶ms->output_shift_2);
|
||||
|
||||
params->input_offset = -input->params.zero_point;
|
||||
params->alpha_offset = -alpha->params.zero_point;
|
||||
params->output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
inline void BroadcastPrelu4DSlowFloat(
|
||||
const RuntimeShape& unextended_input1_shape, const float* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape, const float* input2_data,
|
||||
@@ -60,43 +85,67 @@ inline void BroadcastPrelu4DSlowFloat(
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(PreluParams));
|
||||
}
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
PreluParams* params = static_cast<PreluParams*>(node->user_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
const TfLiteTensor* alpha = GetInput(context, node, 1);
|
||||
TF_LITE_ENSURE(context, alpha != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
int32_t output_multiplier = 0;
|
||||
int output_shift = 0;
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
|
||||
double real_multiplier = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
|
||||
&output_shift);
|
||||
}
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
return CalculatePreluParams(input, alpha, output, params);
|
||||
}
|
||||
|
||||
TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const PreluParams& params =
|
||||
*(static_cast<const PreluParams*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* alpha = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
BroadcastPrelu4DSlowFloat(
|
||||
GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(alpha), GetTensorData<float>(alpha),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
BroadcastPrelu4DSlowFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<float>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
PreluParams op_params;
|
||||
op_params.input_offset = -input->params.zero_point;
|
||||
op_params.alpha_offset = -alpha->params.zero_point;
|
||||
op_params.output_offset = output->params.zero_point;
|
||||
op_params.output_multiplier = output_multiplier;
|
||||
op_params.output_shift = output_shift;
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(alpha), GetTensorData<uint8_t>(alpha),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<uint8_t>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
reference_ops::BroadcastPrelu4DSlow(
|
||||
params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(alpha),
|
||||
tflite::micro::GetTensorData<int8_t>(alpha),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(
|
||||
context, "Only float32 and uint8 are supported currently, got %d.",
|
||||
context, "Only float32 and uint8_t are supported currently, got %d.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
@@ -104,16 +153,15 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace activations
|
||||
|
||||
TfLiteRegistration* Register_PRELU() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::PreluPrepare,
|
||||
/*invoke=*/activations::PreluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_PRELU() {
|
||||
return {/*init=*/activations::PreluInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::PreluPrepare,
|
||||
/*invoke=*/activations::PreluEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -19,19 +19,38 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace quantize {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
tflite::QuantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
int32_t input_zero_point;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
|
||||
// Currently this only support affine per-layer quantization.
|
||||
@@ -43,34 +62,61 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteFloat32 || input->type == kTfLiteInt16);
|
||||
TF_LITE_ENSURE(context,
|
||||
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
|
||||
input->type == kTfLiteInt16 ||
|
||||
input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 ||
|
||||
output->type == kTfLiteInt8 ||
|
||||
output->type == kTfLiteInt16 ||
|
||||
output->type == kTfLiteInt32);
|
||||
|
||||
if (((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) &&
|
||||
output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16)) {
|
||||
double effective_scale = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
|
||||
QuantizeMultiplier(effective_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = output->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(output->params.scale);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
tflite::QuantizationParams op_params;
|
||||
op_params.zero_point = output->params.zero_point;
|
||||
op_params.scale = static_cast<double>(output->params.scale);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
@@ -79,17 +125,45 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
double effective_scale =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
QuantizeMultiplier(effective_scale, &output_multiplier, &output_shift);
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Requantize(
|
||||
GetTensorData<int16_t>(input), size, output_multiplier,
|
||||
output_shift, input->params.zero_point, output->params.zero_point,
|
||||
GetTensorData<int8_t>(output));
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->output_multiplier, data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
case kTfLiteInt32:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->output_multiplier, data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
// Int8 to Int8 requantization, required if the input and output tensors
|
||||
// have different scales and/or zero points.
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
|
||||
size, data->output_multiplier,
|
||||
data->output_shift, data->input_zero_point,
|
||||
data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
@@ -107,23 +181,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace quantize
|
||||
} // namespace
|
||||
|
||||
// This Op (QUANTIZE) quantizes the input and produces quantized output.
|
||||
// AffineQuantize takes scale and zero point and quantizes the float value to
|
||||
// quantized output, in int8 or uint8 format.
|
||||
TfLiteRegistration* Register_QUANTIZE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/quantize::Prepare,
|
||||
/*invoke=*/quantize::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_QUANTIZE() {
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -18,9 +18,12 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mean.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -30,10 +33,27 @@ namespace reduce {
|
||||
constexpr int kMaxNumberOfAxis = 4;
|
||||
constexpr int kMaxNumberOfReducedAxis = 2;
|
||||
|
||||
struct OpData {
|
||||
int32_t multiplier;
|
||||
int shift;
|
||||
int temp_buffer_idx;
|
||||
int resolved_axis_idx;
|
||||
int input_zp;
|
||||
float input_scale;
|
||||
int output_zp;
|
||||
float output_scale;
|
||||
int num_output_elements;
|
||||
};
|
||||
|
||||
void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
|
||||
// Inputs Tensor (dtype depends on quantization):
|
||||
// [0] = Input
|
||||
// [1] = Axis
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
|
||||
// Outputs Tensor (dtype depends on quantization):
|
||||
// [0] = Output
|
||||
@@ -44,13 +64,63 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// Validate axis type
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
TF_LITE_ENSURE(context, axis != nullptr);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
OpData* data = static_cast<OpData*>(node->user_data);
|
||||
const TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const double real_multiplier = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->multiplier, &data->shift);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
|
||||
|
||||
OpData* op_data = static_cast<OpData*>(node->user_data);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
|
||||
op_data->input_scale = input->params.scale;
|
||||
op_data->output_scale = output->params.scale;
|
||||
op_data->num_output_elements = NumElements(output);
|
||||
|
||||
context->RequestScratchBufferInArena(context, sizeof(int) * input->dims->size,
|
||||
&op_data->temp_buffer_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, sizeof(int) * static_cast<int>(ElementCount(*axis->dims)),
|
||||
&op_data->resolved_axis_idx);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
const TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
const double real_multiplier = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
|
||||
}
|
||||
|
||||
int output_size = NumElements(output);
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
|
||||
context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
|
||||
&op_data->temp_buffer_idx);
|
||||
op_data->input_zp = input->params.zero_point;
|
||||
op_data->input_scale = input->params.scale;
|
||||
op_data->output_zp = output->params.zero_point;
|
||||
op_data->output_scale = output->params.scale;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
|
||||
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
|
||||
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@@ -58,7 +128,7 @@ void ResolveAxis(const int* axis_data, int axis_count,
|
||||
tflite::MeanParams* op_params) {
|
||||
int i = 0;
|
||||
for (; i < axis_count; ++i) {
|
||||
op_params->axis[i] = static_cast<int16>(axis_data[i]);
|
||||
op_params->axis[i] = static_cast<int16_t>(axis_data[i]);
|
||||
}
|
||||
for (; i < 4; ++i) {
|
||||
op_params->axis[i] = 1;
|
||||
@@ -67,69 +137,206 @@ void ResolveAxis(const int* axis_data, int axis_count,
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
const TfLiteTensor* axis = GetInput(context, node, 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TfLiteReducerParams* params =
|
||||
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
int num_axis = static_cast<int>(NumElements(axis));
|
||||
int num_axis = static_cast<int>(ElementCount(*axis->dims));
|
||||
int temp_index[kMaxNumberOfAxis];
|
||||
int resolved_axis[kMaxNumberOfReducedAxis];
|
||||
|
||||
tflite::MeanParams op_params;
|
||||
ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis, &op_params);
|
||||
|
||||
// Special case mean implementation exists for 4D mean across axes 1 and 2.
|
||||
bool special_case_4d_axes_1_and_2 =
|
||||
input->dims->size == 4 && op_params.axis_count == 2 &&
|
||||
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1));
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::MeanParams op_params;
|
||||
ResolveAxis(GetTensorData<int>(axis), num_axis, &op_params);
|
||||
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
|
||||
// scratch tensor allocation has been implemented in (b/132070898)
|
||||
bool is_valid_inputs =
|
||||
(NumDimensions(input) == 4 && op_params.axis_count == 2 &&
|
||||
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
|
||||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
|
||||
TF_LITE_ENSURE_MSG(
|
||||
context, is_valid_inputs == true,
|
||||
"Number of Input "
|
||||
"dimensions != 4 OR the Axis is not either [1, 2] or [2, 1]");
|
||||
// TODO(b/139102329): Handle the below special case in the combined
|
||||
// reference method.
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims) {
|
||||
reference_ops::Mean(op_params, GetTensorShape(input),
|
||||
GetTensorData<float>(input), GetTensorShape(output),
|
||||
GetTensorData<float>(output));
|
||||
if (params->keep_dims && special_case_4d_axes_1_and_2) {
|
||||
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, GetTensorData<float>(output),
|
||||
reference_ops::Mean(
|
||||
tflite::micro::GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<float>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis,
|
||||
tflite::micro::GetTensorData<float>(output)));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims && special_case_4d_axes_1_and_2) {
|
||||
reference_integer_ops::Mean(
|
||||
op_params, op_data->multiplier, op_data->shift,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output), op_data->output_zp);
|
||||
} else if (op_data->input_zp == op_data->output_zp &&
|
||||
op_data->input_scale == op_data->output_scale) {
|
||||
int32_t* temp_buffer = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(
|
||||
tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis, temp_buffer));
|
||||
} else {
|
||||
int32_t* temp_buffer = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::QuantizedMeanOrSum(
|
||||
tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
|
||||
op_data->input_scale, input->dims->data, input->dims->size,
|
||||
tflite::micro::GetTensorData<int8_t>(output),
|
||||
op_data->output_zp, op_data->output_scale, output->dims->data,
|
||||
output->dims->size, tflite::micro::GetTensorData<int>(axis),
|
||||
num_axis, params->keep_dims, temp_index, resolved_axis,
|
||||
temp_buffer, false));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteUInt8: {
|
||||
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
|
||||
if (params->keep_dims && special_case_4d_axes_1_and_2) {
|
||||
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
op_data->input_zp, op_data->input_scale,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
op_data->output_zp, op_data->output_scale);
|
||||
} else if (op_data->input_zp == op_data->output_zp &&
|
||||
op_data->input_scale == op_data->output_scale) {
|
||||
uint32_t* temp_buffer = static_cast<uint32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::Mean(tflite::micro::GetTensorData<uint8_t>(input),
|
||||
input->dims->data, input->dims->size,
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_index, resolved_axis,
|
||||
GetTensorData<float>(output)));
|
||||
tflite::micro::GetTensorData<int>(axis),
|
||||
num_axis, params->keep_dims, temp_index,
|
||||
resolved_axis, temp_buffer));
|
||||
} else {
|
||||
uint32_t* temp_buffer = static_cast<uint32_t*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::QuantizedMeanOrSum(
|
||||
tflite::micro::GetTensorData<uint8_t>(input), op_data->input_zp,
|
||||
op_data->input_scale, input->dims->data, input->dims->size,
|
||||
tflite::micro::GetTensorData<uint8_t>(output),
|
||||
op_data->output_zp, op_data->output_scale, output->dims->data,
|
||||
output->dims->size, tflite::micro::GetTensorData<int>(axis),
|
||||
num_axis, params->keep_dims, temp_index, resolved_axis,
|
||||
temp_buffer, false));
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
// TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018)
|
||||
TF_LITE_ENSURE_MSG(context, false,
|
||||
"Currently, only float32 input type "
|
||||
"Currently, only float32, int8 or uint8 input type "
|
||||
"is supported.");
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMax(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TfLiteReducerParams* params =
|
||||
static_cast<TfLiteReducerParams*>(node->builtin_data);
|
||||
OpData* op_data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
// Interpret an axis tensor with null dimensions as a scalar
|
||||
int num_axis = static_cast<int>(ElementCount(*axis->dims));
|
||||
int* temp_buffer = static_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
|
||||
int* resolved_axis = static_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->resolved_axis_idx));
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::ReduceGeneric<float>(
|
||||
tflite::micro::GetTensorData<float>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<float>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_buffer, resolved_axis,
|
||||
std::numeric_limits<float>::lowest(),
|
||||
[](const float current, const float in) -> float {
|
||||
return (in > current) ? in : current;
|
||||
}));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
TF_LITE_ENSURE_EQ(context, static_cast<double>(op_data->input_scale),
|
||||
static_cast<double>(op_data->output_scale));
|
||||
TF_LITE_ENSURE_EQ(context, op_data->input_zp, op_data->output_zp);
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
reference_ops::ReduceGeneric<int8_t>(
|
||||
tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
|
||||
input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
|
||||
output->dims->data, output->dims->size,
|
||||
tflite::micro::GetTensorData<int>(axis), num_axis,
|
||||
params->keep_dims, temp_buffer, resolved_axis,
|
||||
std::numeric_limits<int8_t>::lowest(),
|
||||
[](const int8_t current, const int8_t in) -> int8_t {
|
||||
return (in > current) ? in : current;
|
||||
}));
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Only float32 and int8 types are supported.\n");
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace reduce
|
||||
|
||||
TfLiteRegistration* Register_MEAN() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMeanOrSum,
|
||||
/*invoke=*/reduce::EvalMean,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_MEAN() {
|
||||
return {/*init=*/reduce::InitReduce,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMeanOrSum,
|
||||
/*invoke=*/reduce::EvalMean,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_REDUCE_MAX() {
|
||||
return {/*init=*/reduce::InitReduce,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reduce::PrepareMax,
|
||||
/*invoke=*/reduce::EvalMax,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
@@ -18,6 +18,9 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -29,7 +32,9 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
// Tensorflow's Reshape allows one of the shape components to have the
|
||||
// special -1 value, meaning it will be calculated automatically based on the
|
||||
// input. Here we calculate what that dimension should be so that the number
|
||||
@@ -61,7 +66,7 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
num_output_elements *= output_shape->data[stretch_dim];
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
@@ -74,13 +79,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/162522304): storing input bytes in OpData increases some models
|
||||
// significantly, possibly due to alignment issues.
|
||||
size_t input_bytes;
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
|
||||
input_bytes *= ElementCount(*input->dims);
|
||||
|
||||
// Do nothing for in-place reshape.
|
||||
if (input->data.raw != output->data.raw) {
|
||||
// Otherwise perform reshape with copy.
|
||||
for (size_t i = 0; i < input->bytes; ++i) {
|
||||
for (size_t i = 0; i < input_bytes; ++i) {
|
||||
output->data.raw[i] = input->data.raw[i];
|
||||
}
|
||||
}
|
||||
@@ -89,16 +102,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
} // namespace reshape
|
||||
|
||||
TfLiteRegistration* Register_RESHAPE() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reshape::Prepare,
|
||||
/*invoke=*/reshape::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RESHAPE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/reshape::Prepare,
|
||||
/*invoke=*/reshape::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -31,7 +32,6 @@ constexpr int kSizeTensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(DEBUG)
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
@@ -49,11 +49,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
output->type = input->type;
|
||||
|
||||
if (!IsConstantTensor(size)) {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Dynamic tensors are unsupported in tfmicro.");
|
||||
TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
#endif
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
@@ -61,9 +59,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
const TfLiteEvalTensor* size =
|
||||
tflite::micro::GetEvalInput(context, node, kSizeTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
tflite::ResizeNearestNeighborParams op_params;
|
||||
op_params.align_corners = params->align_corners;
|
||||
@@ -71,22 +72,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<int32>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<int32>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int32_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
reference_ops::ResizeNearestNeighbor(
|
||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(size), GetTensorData<int32>(size),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(size),
|
||||
tflite::micro::GetTensorData<int32_t>(size),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context,
|
||||
"Output type is %d, requires float, uint8 or int8.",
|
||||
"Output type is %d, requires float, uint8_t or int8_t.",
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
@@ -95,16 +105,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
} // namespace resize_nearest_neighbor
|
||||
|
||||
TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/resize_nearest_neighbor::Prepare,
|
||||
/*invoke=*/resize_nearest_neighbor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/resize_nearest_neighbor::Prepare,
|
||||
/*invoke=*/resize_nearest_neighbor::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
@@ -18,6 +18,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
@@ -29,11 +30,13 @@ constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
@@ -43,26 +46,29 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Round(GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
reference_ops::Round(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace round
|
||||
|
||||
TfLiteRegistration* Register_ROUND() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/round::Prepare,
|
||||
/*invoke=*/round::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_ROUND() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/round::Prepare,
|
||||
/*invoke=*/round::Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
|
||||
73
code/lib/tfmicro/tensorflow/lite/micro/kernels/shape.cc
Normal file
73
code/lib/tfmicro/tensorflow/lite/micro/kernels/shape.cc
Normal file
@@ -0,0 +1,73 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
void ExtractShape(const TfLiteEvalTensor* input, int32_t* output_data) {
|
||||
for (int i = 0; i < input->dims->size; ++i) {
|
||||
output_data[i] = input->dims->data[i];
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
if (output->type != kTfLiteInt32) {
|
||||
TF_LITE_KERNEL_LOG(context, "Output type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
} else {
|
||||
ExtractShape(input, tflite::micro::GetTensorData<int32_t>(output));
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_SHAPE() {
|
||||
return {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -22,29 +22,35 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace activations {
|
||||
namespace {
|
||||
|
||||
// Softmax parameter data that persists in user_data
|
||||
static constexpr int kInt16LUTArraySize = 513;
|
||||
|
||||
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
TfLiteTensor* output,
|
||||
const TfLiteSoftmaxParams* params,
|
||||
SoftmaxParams* op_data) {
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
|
||||
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteInt16) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
} else {
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
|
||||
(0.001f * 1.f / 32768));
|
||||
} else { // input->type == kTfLiteInt8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
|
||||
// NOTE: Current int16 softmax output does not require symmetric scaling
|
||||
// - so no need to verify scale here.
|
||||
} else {
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536,
|
||||
(0.001f * 1.f / 65536));
|
||||
} else { // output->type == kTfLiteint8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
|
||||
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
|
||||
@@ -53,15 +59,28 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
// Calculate input_multiplier and input_left_shift
|
||||
if (input->type == kTfLiteInt16) {
|
||||
int input_left_shift;
|
||||
double input_scale_beta_rescale =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(params->beta) /
|
||||
(10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
|
||||
// correspond to [-10.0, 0.0]
|
||||
QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier,
|
||||
&input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
} else {
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
}
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
@@ -70,53 +89,106 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else {
|
||||
tflite::reference_ops::SoftmaxInt16(
|
||||
op_data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
}
|
||||
}
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Takes a tensor and performs softmax along the last dimension.
|
||||
void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<float>(input),
|
||||
GetTensorShape(output), GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
const SoftmaxParams& op_data) {
|
||||
if (input->type == kTfLiteUInt8) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<uint8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<uint8_t>(output));
|
||||
} else {
|
||||
if (output->type == kTfLiteInt16) {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Softmax(
|
||||
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||
GetTensorShape(output), GetTensorData<int8_t>(output));
|
||||
}
|
||||
TF_LITE_ENSURE(context, node->user_data != nullptr);
|
||||
SoftmaxParams* op_data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
// Only allocate LUTs for KTfLiteInt16 data type
|
||||
if (input->type == kTfLiteInt16) {
|
||||
void* raw_exp_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
|
||||
op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
|
||||
void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
|
||||
op_data->one_over_one_plus_x_lut =
|
||||
reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
|
||||
}
|
||||
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteUInt8 ||
|
||||
input->type == kTfLiteInt16);
|
||||
} else {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
}
|
||||
|
||||
// Populate LUT if required
|
||||
if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
// exp LUT only used on negative values
|
||||
// we consider exp(-10.0) is insignificant to accumulation
|
||||
gen_lut([](float value) { return std::exp(value); }, -10.0f, 0.0f,
|
||||
op_data->exp_lut, kInt16LUTArraySize);
|
||||
gen_lut([](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f,
|
||||
op_data->one_over_one_plus_x_lut, kInt16LUTArraySize);
|
||||
op_data->zero_point = output->params.zero_point;
|
||||
op_data->scale = output->params.scale;
|
||||
}
|
||||
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
return CalculateSoftmaxParams(context, input, output, params, op_data);
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
const TfLiteTensor* input = GetInput(context, node, 0);
|
||||
TfLiteTensor* output = GetOutput(context, node, 0);
|
||||
|
||||
SoftmaxParams op_data;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateSoftmaxParams(context, input, output, params, &op_data));
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
@@ -124,7 +196,8 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8:
|
||||
case kTfLiteUInt8: {
|
||||
case kTfLiteUInt8:
|
||||
case kTfLiteInt16: {
|
||||
SoftmaxQuantized(input, output, op_data);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
@@ -134,20 +207,17 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
} // namespace activations
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_SOFTMAX() {
|
||||
static TfLiteRegistration r = {/*init=*/nullptr,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/activations::SoftmaxPrepare,
|
||||
/*invoke=*/activations::SoftmaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
return &r;
|
||||
TfLiteRegistration Register_SOFTMAX() {
|
||||
return {/*init=*/SoftmaxInit,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/SoftmaxPrepare,
|
||||
/*invoke=*/SoftmaxEval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user