rolling 20210708

This commit is contained in:
jomjol
2021-08-07 15:25:27 +02:00
parent 6f06af1d5f
commit 32f15fc557
138 changed files with 8048 additions and 2292 deletions

View File

@@ -45,7 +45,12 @@ In other cases you can contact the developer via email: <img src="https://raw.gi
**General remark:** Beside the `firmware.bin`, typically also the content of `/html` needs to be updated!
##### Rolling (2021-08-07)
* GPIO: using the general mqtt main topic for GPIO
* Update tfmicro to new master (2021-08-07)
* Bug fix: mqtt value
* Based on v2021-08-01
##### 8.0.5 - Multi Meter Support (2021-08-01)

View File

@@ -8,6 +8,8 @@
#include "esp_system.h"
#include "esp_event.h"
#include "server_tflite.h"
//#define LOG_LOCAL_LEVEL ESP_LOG_DEBUG
#include "esp_log.h"
//#include "errno.h"
@@ -303,7 +305,14 @@ bool GpioHandler::readConfig()
if (!_isEnabled)
return false;
std::string mainTopicMQTT = "";
// std::string mainTopicMQTT = "";
std::string mainTopicMQTT = GetMQTTMainTopic();
if (mainTopicMQTT.length() > 0)
{
mainTopicMQTT = mainTopicMQTT + "/GPIO";
ESP_LOGD(TAG_SERVERGPIO, "MAINTOPICMQTT found\r\n");
}
bool registerISR = false;
while (configFile.getNextLine(&line, disabledLine, eof) && !configFile.isNewParagraph(line))
{
@@ -315,8 +324,8 @@ bool GpioHandler::readConfig()
// std::string gpioStr = pieces_match[1];
ESP_LOGD(TAG_SERVERGPIO, "conf param %s\r\n", toUpper(zerlegt[0]).c_str());
if (toUpper(zerlegt[0]) == "MAINTOPICMQTT") {
ESP_LOGD(TAG_SERVERGPIO, "MAINTOPICMQTT found\r\n");
mainTopicMQTT = zerlegt[1];
// ESP_LOGD(TAG_SERVERGPIO, "MAINTOPICMQTT found\r\n");
// mainTopicMQTT = zerlegt[1];
} else if ((zerlegt[0].rfind("IO", 0) == 0) && (zerlegt.size() >= 6))
{
ESP_LOGI(TAG_SERVERGPIO,"Enable GP%s in %s mode", zerlegt[0].c_str(), zerlegt[1].c_str());

View File

@@ -72,6 +72,19 @@ std::vector<HTMLInfo*> ClassFlowControll::GetAllAnalog()
}
string ClassFlowControll::GetMQTTMainTopic()
{
for (int i = 0; i < FlowControll.size(); ++i)
if (FlowControll[i]->name().compare("ClassFlowMQTT") == 0)
return ((ClassFlowMQTT*) (FlowControll[i]))->GetMQTTMainTopic();
return "";
}
void ClassFlowControll::SetInitialParameter(void)
{
AutoStart = false;
@@ -445,6 +458,7 @@ bool ClassFlowControll::ReadParameter(FILE* pfile, string& aktparamgraph)
return true;
}
int ClassFlowControll::CleanTempFolder() {
const char* folderPath = "/sdcard/img_tmp";

View File

@@ -49,6 +49,8 @@ public:
string GetPrevalue(std::string _number = "");
bool ReadParameter(FILE* pfile, string& aktparamgraph);
string GetMQTTMainTopic();
esp_err_t GetJPGStream(std::string _fn, httpd_req_t *req);
esp_err_t SendRawJPG(httpd_req_t *req);

View File

@@ -122,6 +122,12 @@ bool ClassFlowMQTT::ReadParameter(FILE* pfile, string& aktparamgraph)
}
string ClassFlowMQTT::GetMQTTMainTopic()
{
return maintopic;
}
bool ClassFlowMQTT::doFlow(string zwtime)
{
if (!MQTTenable)
@@ -152,7 +158,7 @@ bool ClassFlowMQTT::doFlow(string zwtime)
for (int i = 0; i < NUMBERS.size(); ++i)
{
result = NUMBERS[i]->ReturnValue;
result = NUMBERS[i]->ReturnValueNoError;
resulterror = NUMBERS[i]->ErrorMessageText;
resultrate = std::to_string(NUMBERS[i]->FlowRateAct);
resulttimestamp = NUMBERS[i]->timeStamp;

View File

@@ -23,6 +23,8 @@ public:
ClassFlowMQTT(std::vector<ClassFlow*>* lfc);
ClassFlowMQTT(std::vector<ClassFlow*>* lfc, ClassFlow *_prev);
string GetMQTTMainTopic();
bool ReadParameter(FILE* pfile, string& aktparamgraph);
bool doFlow(string time);
string name(){return "ClassFlowMQTT";};

View File

@@ -655,6 +655,11 @@ void TFliteDoAutoStart()
xTaskCreate(&task_autodoFlow, "task_autodoFlow", configMINIMAL_STACK_SIZE * 64, NULL, tskIDLE_PRIORITY+1, &xHandletask_autodoFlow);
}
std::string GetMQTTMainTopic()
{
return tfliteflow.GetMQTTMainTopic();
}
void register_server_tflite_uri(httpd_handle_t server)

View File

@@ -1,4 +1,5 @@
#include <esp_log.h>
#include <string>
#include <esp_http_server.h>
#include "CImageBasis.h"
@@ -13,6 +14,8 @@ void TFliteDoAutoStart();
bool isSetupModusActive();
std::string GetMQTTMainTopic();
esp_err_t GetJPG(std::string _filename, httpd_req_t *req);
esp_err_t GetRawJPG(httpd_req_t *req);

BIN
code/components/tfmicro.zip Normal file

Binary file not shown.

View File

@@ -23,7 +23,7 @@ if(NOT DEFINED ENV{IDF_PATH})
endif()
idf_component_register(
SRCS tensorflow/lite/micro/simple_memory_allocator.cc tensorflow/lite/micro/micro_error_reporter.cc tensorflow/lite/micro/memory_helpers.cc tensorflow/lite/micro/test_helpers.cc tensorflow/lite/micro/recording_micro_allocator.cc tensorflow/lite/micro/micro_time.cc tensorflow/lite/micro/recording_simple_memory_allocator.cc tensorflow/lite/micro/micro_string.cc tensorflow/lite/micro/micro_profiler.cc tensorflow/lite/micro/debug_log.cc tensorflow/lite/micro/all_ops_resolver.cc tensorflow/lite/micro/micro_utils.cc tensorflow/lite/micro/micro_interpreter.cc tensorflow/lite/micro/micro_allocator.cc tensorflow/lite/micro/system_setup.cc tensorflow/lite/micro/memory_planner/linear_memory_planner.cc tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc tensorflow/lite/c/common.c tensorflow/lite/core/api/error_reporter.cc tensorflow/lite/core/api/flatbuffer_conversions.cc tensorflow/lite/core/api/op_resolver.cc tensorflow/lite/core/api/tensor_utils.cc tensorflow/lite/kernels/internal/quantization_util.cc tensorflow/lite/kernels/kernel_util.cc tensorflow/lite/schema/schema_utils.cc tensorflow/lite/micro/kernels/activations.cc tensorflow/lite/micro/kernels/add.cc tensorflow/lite/micro/kernels/add_n.cc tensorflow/lite/micro/kernels/arg_min_max.cc tensorflow/lite/micro/kernels/batch_to_space_nd.cc tensorflow/lite/micro/kernels/cast.cc tensorflow/lite/micro/kernels/ceil.cc tensorflow/lite/micro/kernels/circular_buffer.cc tensorflow/lite/micro/kernels/comparisons.cc tensorflow/lite/micro/kernels/concatenation.cc tensorflow/lite/micro/kernels/conv.cc tensorflow/lite/micro/kernels/conv_common.cc tensorflow/lite/micro/kernels/depthwise_conv.cc tensorflow/lite/micro/kernels/depthwise_conv_common.cc tensorflow/lite/micro/kernels/dequantize.cc tensorflow/lite/micro/kernels/detection_postprocess.cc tensorflow/lite/micro/kernels/div.cc tensorflow/lite/micro/kernels/elementwise.cc tensorflow/lite/micro/kernels/elu.cc tensorflow/lite/micro/kernels/ethosu.cc tensorflow/lite/micro/kernels/exp.cc tensorflow/lite/micro/kernels/expand_dims.cc tensorflow/lite/micro/kernels/fill.cc tensorflow/lite/micro/kernels/floor.cc tensorflow/lite/micro/kernels/fully_connected.cc tensorflow/lite/micro/kernels/fully_connected_common.cc tensorflow/lite/micro/kernels/hard_swish.cc tensorflow/lite/micro/kernels/kernel_runner.cc tensorflow/lite/micro/kernels/kernel_util.cc tensorflow/lite/micro/kernels/l2norm.cc tensorflow/lite/micro/kernels/l2_pool_2d.cc tensorflow/lite/micro/kernels/leaky_relu.cc tensorflow/lite/micro/kernels/logical.cc tensorflow/lite/micro/kernels/logistic.cc tensorflow/lite/micro/kernels/maximum_minimum.cc tensorflow/lite/micro/kernels/mul.cc tensorflow/lite/micro/kernels/neg.cc tensorflow/lite/micro/kernels/pack.cc tensorflow/lite/micro/kernels/pad.cc tensorflow/lite/micro/kernels/pooling.cc tensorflow/lite/micro/kernels/prelu.cc tensorflow/lite/micro/kernels/quantize.cc tensorflow/lite/micro/kernels/quantize_common.cc tensorflow/lite/micro/kernels/reduce.cc tensorflow/lite/micro/kernels/reshape.cc tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc tensorflow/lite/micro/kernels/round.cc tensorflow/lite/micro/kernels/shape.cc tensorflow/lite/micro/kernels/softmax.cc tensorflow/lite/micro/kernels/softmax_common.cc tensorflow/lite/micro/kernels/space_to_batch_nd.cc tensorflow/lite/micro/kernels/split.cc tensorflow/lite/micro/kernels/split_v.cc tensorflow/lite/micro/kernels/squeeze.cc tensorflow/lite/micro/kernels/strided_slice.cc tensorflow/lite/micro/kernels/sub.cc tensorflow/lite/micro/kernels/svdf.cc tensorflow/lite/micro/kernels/svdf_common.cc tensorflow/lite/micro/kernels/tanh.cc tensorflow/lite/micro/kernels/transpose_conv.cc tensorflow/lite/micro/kernels/unpack.cc tensorflow/lite/micro/kernels/zeros_like.cc
SRCS tensorflow/lite/micro/simple_memory_allocator.cc tensorflow/lite/micro/debug_log.cc tensorflow/lite/micro/micro_error_reporter.cc tensorflow/lite/micro/memory_helpers.cc tensorflow/lite/micro/test_helpers.cc tensorflow/lite/micro/recording_micro_allocator.cc tensorflow/lite/micro/micro_time.cc tensorflow/lite/micro/recording_simple_memory_allocator.cc tensorflow/lite/micro/micro_string.cc tensorflow/lite/micro/micro_profiler.cc tensorflow/lite/micro/flatbuffer_utils.cc tensorflow/lite/micro/micro_graph.cc tensorflow/lite/micro/mock_micro_graph.cc tensorflow/lite/micro/all_ops_resolver.cc tensorflow/lite/micro/micro_utils.cc tensorflow/lite/micro/micro_interpreter.cc tensorflow/lite/micro/micro_allocator.cc tensorflow/lite/micro/system_setup.cc tensorflow/lite/micro/memory_planner/linear_memory_planner.cc tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc tensorflow/lite/schema/schema_utils.cc tensorflow/lite/c/common.c tensorflow/lite/core/api/tensor_utils.cc tensorflow/lite/core/api/error_reporter.cc tensorflow/lite/core/api/flatbuffer_conversions.cc tensorflow/lite/core/api/op_resolver.cc tensorflow/lite/kernels/kernel_util.cc tensorflow/lite/kernels/internal/quantization_util.cc tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc tensorflow/lite/micro/kernels/activations.cc tensorflow/lite/micro/kernels/activations_common.cc tensorflow/lite/micro/kernels/add.cc tensorflow/lite/micro/kernels/add_n.cc tensorflow/lite/micro/kernels/arg_min_max.cc tensorflow/lite/micro/kernels/batch_to_space_nd.cc tensorflow/lite/micro/kernels/cast.cc tensorflow/lite/micro/kernels/ceil.cc tensorflow/lite/micro/kernels/circular_buffer.cc tensorflow/lite/micro/kernels/comparisons.cc tensorflow/lite/micro/kernels/concatenation.cc tensorflow/lite/micro/kernels/conv.cc tensorflow/lite/micro/kernels/conv_common.cc tensorflow/lite/micro/kernels/cumsum.cc tensorflow/lite/micro/kernels/depth_to_space.cc tensorflow/lite/micro/kernels/depthwise_conv.cc tensorflow/lite/micro/kernels/depthwise_conv_common.cc tensorflow/lite/micro/kernels/dequantize.cc tensorflow/lite/micro/kernels/detection_postprocess.cc tensorflow/lite/micro/kernels/elementwise.cc tensorflow/lite/micro/kernels/elu.cc tensorflow/lite/micro/kernels/ethosu.cc tensorflow/lite/micro/kernels/exp.cc tensorflow/lite/micro/kernels/expand_dims.cc tensorflow/lite/micro/kernels/fill.cc tensorflow/lite/micro/kernels/floor.cc tensorflow/lite/micro/kernels/floor_div.cc tensorflow/lite/micro/kernels/floor_mod.cc tensorflow/lite/micro/kernels/fully_connected.cc tensorflow/lite/micro/kernels/fully_connected_common.cc tensorflow/lite/micro/kernels/gather.cc tensorflow/lite/micro/kernels/gather_nd.cc tensorflow/lite/micro/kernels/hard_swish.cc tensorflow/lite/micro/kernels/hard_swish_common.cc tensorflow/lite/micro/kernels/if.cc tensorflow/lite/micro/kernels/kernel_runner.cc tensorflow/lite/micro/kernels/kernel_util.cc tensorflow/lite/micro/kernels/l2norm.cc tensorflow/lite/micro/kernels/l2_pool_2d.cc tensorflow/lite/micro/kernels/leaky_relu.cc tensorflow/lite/micro/kernels/logical.cc tensorflow/lite/micro/kernels/logical_common.cc tensorflow/lite/micro/kernels/logistic.cc tensorflow/lite/micro/kernels/logistic_common.cc tensorflow/lite/micro/kernels/log_softmax.cc tensorflow/lite/micro/kernels/maximum_minimum.cc tensorflow/lite/micro/kernels/mul.cc tensorflow/lite/micro/kernels/neg.cc tensorflow/lite/micro/kernels/pack.cc tensorflow/lite/micro/kernels/pad.cc tensorflow/lite/micro/kernels/pooling.cc tensorflow/lite/micro/kernels/pooling_common.cc tensorflow/lite/micro/kernels/prelu.cc tensorflow/lite/micro/kernels/quantize.cc tensorflow/lite/micro/kernels/quantize_common.cc tensorflow/lite/micro/kernels/reduce.cc tensorflow/lite/micro/kernels/reshape.cc tensorflow/lite/micro/kernels/resize_bilinear.cc tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc tensorflow/lite/micro/kernels/round.cc tensorflow/lite/micro/kernels/shape.cc tensorflow/lite/micro/kernels/softmax.cc tensorflow/lite/micro/kernels/softmax_common.cc tensorflow/lite/micro/kernels/space_to_batch_nd.cc tensorflow/lite/micro/kernels/space_to_depth.cc tensorflow/lite/micro/kernels/split.cc tensorflow/lite/micro/kernels/split_v.cc tensorflow/lite/micro/kernels/squeeze.cc tensorflow/lite/micro/kernels/strided_slice.cc tensorflow/lite/micro/kernels/sub.cc tensorflow/lite/micro/kernels/svdf.cc tensorflow/lite/micro/kernels/svdf_common.cc tensorflow/lite/micro/kernels/tanh.cc tensorflow/lite/micro/kernels/transpose.cc tensorflow/lite/micro/kernels/transpose_conv.cc tensorflow/lite/micro/kernels/unpack.cc tensorflow/lite/micro/kernels/zeros_like.cc
INCLUDE_DIRS . third_party/gemmlowp third_party/flatbuffers/include third_party/ruy)
# Reduce the level of paranoia to be able to compile TF sources
@@ -32,7 +32,7 @@ target_compile_options(${COMPONENT_LIB} PRIVATE
-Wno-missing-field-initializers
-Wno-type-limits)
target_compile_options(${COMPONENT_LIB} PRIVATE -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Werror -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -DESP)
target_compile_options(${COMPONENT_LIB} PRIVATE $<$<COMPILE_LANGUAGE:CXX>: -std=c++11 -fno-rtti -fno-exceptions -fno-threadsafe-statics -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Werror -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -DESP >)
target_compile_options(${COMPONENT_LIB} PRIVATE -Wimplicit-function-declaration -Werror -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -DESP)
target_compile_options(${COMPONENT_LIB} PRIVATE $<$<COMPILE_LANGUAGE:CXX>: -std=c++11 -fno-rtti -fno-exceptions -fno-threadsafe-statics -Werror -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -DESP >)
target_compile_options(${COMPONENT_LIB} INTERFACE $<$<IN_LIST:-DTF_LITE_STATIC_MEMORY,$<TARGET_PROPERTY:${COMPONENT_LIB},COMPILE_OPTIONS>>:-DTF_LITE_STATIC_MEMORY>)
target_link_libraries(${COMPONENT_LIB} PRIVATE -lm)

View File

@@ -63,7 +63,6 @@ typedef struct {
} TfLiteMirrorPaddingParams;
// Possible fused activation functions.
// TODO(aselle): rename to TfLiteActivation
typedef enum {
kTfLiteActNone = 0,
kTfLiteActRelu,
@@ -98,6 +97,8 @@ typedef struct {
TfLiteFusedActivation activation;
} TfLiteConv3DParams;
typedef TfLiteConv3DParams TfLiteConv3DTransposeParams;
typedef struct {
TfLitePadding padding;
int stride_width;
@@ -328,8 +329,9 @@ typedef struct {
} TfLitePadV2Params;
typedef struct {
// TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
// For now we will fix the maximum possible number of dimensions.
// These fields are only used in old models for backward compatibility.
// In the current implementation, we use the 2nd input of the op as the shape,
// and these fields are unused.
int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
int num_dimensions;
} TfLiteReshapeParams;
@@ -495,6 +497,11 @@ typedef struct {
TfLiteType value_dtype;
} TfLiteHashtableParams;
typedef struct {
const char* container;
const char* shared_name;
} TfLiteVarHandleParams;
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus

View File

@@ -29,7 +29,9 @@ extern "C" {
// library.
#ifdef SWIG
#define TFL_CAPI_EXPORT
#else
#elif defined(TFL_STATIC_LIBRARY_BUILD)
#define TFL_CAPI_EXPORT
#else // not definded TFL_STATIC_LIBRARY_BUILD
#if defined(_WIN32)
#ifdef TFL_COMPILE_LIBRARY
#define TFL_CAPI_EXPORT __declspec(dllexport)
@@ -54,7 +56,19 @@ typedef enum TfLiteStatus {
// incompatibility between runtime and delegate, e.g., this error is returned
// when trying to apply a TfLite delegate onto a model graph that's already
// immutable.
kTfLiteApplicationError = 3
kTfLiteApplicationError = 3,
// Generally referring to serialized delegate data not being found.
// See tflite::delegates::Serialization.
kTfLiteDelegateDataNotFound = 4,
// Generally referring to data-writing issues in delegate serialization.
// See tflite::delegates::Serialization.
kTfLiteDelegateDataWriteError = 5,
// Generally referring to data-reading issues in delegate serialization.
// See tflite::delegates::Serialization.
kTfLiteDelegateDataReadError = 5,
} TfLiteStatus;
// Types supported by tensor

View File

@@ -45,8 +45,10 @@ int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
#ifndef TF_LITE_STATIC_MEMORY
TfLiteIntArray* TfLiteIntArrayCreate(int size) {
TfLiteIntArray* ret =
(TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
int alloc_size = TfLiteIntArrayGetSizeInBytes(size);
if (alloc_size <= 0) return NULL;
TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
if (!ret) return ret;
ret->size = size;
return ret;
}
@@ -181,9 +183,9 @@ void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
}
// TODO(b/145340303): Tensor data should be aligned.
if (!tensor->data.raw) {
tensor->data.raw = malloc(num_bytes);
tensor->data.raw = (char*)malloc(num_bytes);
} else if (num_bytes > tensor->bytes) {
tensor->data.raw = realloc(tensor->data.raw, num_bytes);
tensor->data.raw = (char*)realloc(tensor->data.raw, num_bytes);
}
tensor->bytes = num_bytes;
}
@@ -229,7 +231,7 @@ const char* TfLiteTypeGetName(TfLiteType type) {
return "Unknown type";
}
TfLiteDelegate TfLiteDelegateCreate() {
TfLiteDelegate TfLiteDelegateCreate(void) {
TfLiteDelegate d = {
.data_ = NULL,
.Prepare = NULL,

View File

@@ -456,8 +456,8 @@ typedef struct TfLiteTensor {
} TfLiteTensor;
// A structure representing an instance of a node.
// This structure only exhibits the inputs, outputs and user defined data, not
// other features like the type.
// This structure only exhibits the inputs, outputs, user defined data and some
// node properties (like statefulness), not other features like the type.
typedef struct TfLiteNode {
// Inputs to this node expressed as indices into the simulator's tensors.
TfLiteIntArray* inputs;
@@ -490,6 +490,9 @@ typedef struct TfLiteNode {
// created by calling `interpreter.ModifyGraphWithDelegate`.
// WARNING: This is an experimental interface that is subject to change.
struct TfLiteDelegate* delegate;
// Whether this op might have side effect (e.g. stateful op).
bool might_have_side_effect;
} TfLiteNode;
#else // defined(TF_LITE_STATIC_MEMORY)?
// NOTE: This flag is opt-in only at compile time.
@@ -640,6 +643,7 @@ typedef struct TfLiteContext {
// TfLiteDelegates can traverse the current execution plan by iterating
// through each member of this array and using GetNodeAndRegistration() to
// access details about a node. i.e.
//
// TfLiteIntArray* execution_plan;
// TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
// for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
@@ -648,6 +652,28 @@ typedef struct TfLiteContext {
// TfLiteRegistration* reg;
// context->GetNodeAndRegistration(context, node_index, &node, &reg);
// }
// Note: the memory pointed by '`*execution_plan` is OWNED by TfLite runtime.
// Future calls to GetExecutionPlan invalidates earlier outputs. The following
// code snippet shows the issue of such an invocation pattern. After calling
// CheckNode, subsequent access to `plan_1st` is undefined.
//
// void CheckNode(const TfLiteNode* node) {
// ...
// TfLiteIntArray* plan_2nd;
// TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan_2nd));
// ...
// }
//
// TfLiteIntArray* plan_1st;
// TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan_1st));
// for (int exec_index = 0; exec_index < plan_1st->size; exec_index++) {
// int node_index = plan_1st->data[exec_index];
// TfLiteNode* node;
// TfLiteRegistration* reg;
// context->GetNodeAndRegistration(context, node_index, &node, &reg);
// CheckNode(node);
// }
//
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context,
TfLiteIntArray** execution_plan);
@@ -777,6 +803,18 @@ typedef struct TfLiteContext {
// WARNING: This method may not be available on all platforms.
TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
int tensor_idx);
// Retrieves named metadata buffer from the TFLite model.
// Returns kTfLiteOk if metadata is successfully obtained from the flatbuffer
// Model: that is, there exists a `metadata` entry with given `name` string.
// (see TFLite's schema.fbs).
// The corresponding `buffer` information is populated in `ptr` & `bytes`.
// The data from `ptr` is valid for the lifetime of the Interpreter.
//
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*GetModelMetadata)(const struct TfLiteContext* context,
const char* name, const char** ptr,
size_t* bytes);
} TfLiteContext;
typedef struct TfLiteRegistration {
@@ -918,7 +956,7 @@ typedef struct TfLiteDelegate {
// Build a 'null' delegate, with all the fields properly set to their default
// values.
TfLiteDelegate TfLiteDelegateCreate();
TfLiteDelegate TfLiteDelegateCreate(void);
#ifdef __cplusplus
} // extern "C"

View File

@@ -373,6 +373,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
return ParseReducer(op, error_reporter, allocator, builtin_data);
}
case BuiltinOperator_REDUCE_ALL: {
return ParseReducer(op, error_reporter, allocator, builtin_data);
}
case BuiltinOperator_REDUCE_MAX: {
return ParseReducer(op, error_reporter, allocator, builtin_data);
}
@@ -663,7 +667,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
return kTfLiteOk;
}
case BuiltinOperator_DELEGATE: {
// TODO(ycling): Revisit when supporting saving delegated models.
TF_LITE_REPORT_ERROR(error_reporter,
"DELEGATE op shouldn't exist in model.");
return kTfLiteError;
@@ -757,7 +760,8 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
*builtin_data = params.release();
return kTfLiteOk;
}
case BuiltinOperator_CONV_3D: {
case BuiltinOperator_CONV_3D:
case BuiltinOperator_CONV_3D_TRANSPOSE: {
auto params = safe_allocator.Allocate<TfLiteConv3DParams>();
TF_LITE_ENSURE(error_reporter, params != nullptr);
if (const auto* conv3d_params = op->builtin_options_as_Conv3DOptions()) {
@@ -789,6 +793,21 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
*builtin_data = params.release();
return kTfLiteOk;
}
case BuiltinOperator_VAR_HANDLE: {
auto params = safe_allocator.Allocate<TfLiteVarHandleParams>();
TF_LITE_ENSURE(error_reporter, params != nullptr);
params->container = nullptr;
params->shared_name = nullptr;
if (const auto* var_handle_params =
op->builtin_options_as_VarHandleOptions()) {
if (var_handle_params->container())
params->container = var_handle_params->container()->c_str();
if (var_handle_params->shared_name())
params->shared_name = var_handle_params->shared_name()->c_str();
}
*builtin_data = params.release();
return kTfLiteOk;
}
// Below are the ops with no builtin_data structure.
// TODO(aselle): Implement call in BuiltinOptions, but nullptrs are
// ok for now, since there is no call implementation either.
@@ -825,6 +844,9 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
case BuiltinOperator_HASHTABLE_FIND:
case BuiltinOperator_HASHTABLE_IMPORT:
case BuiltinOperator_HASHTABLE_SIZE:
case BuiltinOperator_READ_VARIABLE:
case BuiltinOperator_ASSIGN_VARIABLE:
case BuiltinOperator_BROADCAST_ARGS:
return kTfLiteOk;
case BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES:
return kTfLiteError;
@@ -1372,6 +1394,30 @@ TfLiteStatus ParseHardSwish(const Operator*, ErrorReporter*,
return kTfLiteOk;
}
TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data) {
CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
SafeBuiltinDataAllocator safe_allocator(allocator);
std::unique_ptr<TfLiteIfParams, SafeBuiltinDataAllocator::BuiltinDataDeleter>
params = safe_allocator.Allocate<TfLiteIfParams>();
TF_LITE_ENSURE(error_reporter, params != nullptr);
const IfOptions* schema_params = op->builtin_options_as_IfOptions();
if (schema_params != nullptr) {
params->then_subgraph_index = schema_params->then_subgraph_index();
params->else_subgraph_index = schema_params->else_subgraph_index();
} else {
// TODO(b/157480169): We should either return kTfLiteError or fill in some
// reasonable defaults in the params struct. We are not doing so until we
// better undertand the ramifications of changing the legacy behavior.
}
*builtin_data = params.release();
return kTfLiteOk;
}
TfLiteStatus ParseL2Normalization(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,

View File

@@ -181,6 +181,9 @@ TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseL2Normalization(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,

View File

@@ -30,8 +30,7 @@ TfLiteStatus GetRegistrationFromOpCode(
auto builtin_code = GetBuiltinCode(opcode);
int version = opcode->version();
if (builtin_code > BuiltinOperator_MAX ||
builtin_code < BuiltinOperator_MIN) {
if (builtin_code > BuiltinOperator_MAX) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Op builtin_code out of range: %d. Are you using old TFLite binary "

View File

@@ -46,6 +46,22 @@ class OpResolver {
}
virtual ~OpResolver() {}
private:
/// Returns true if this OpResolver may contain any "user defined" ops.
/// By "user defined" ops, we mean any op definitions other than those
/// contained in tflite::ops::builtin::BuiltinOpResolver.
///
/// If this method returns true, it doesn't necessarily mean that the
/// OpResolver contains a user-defined op, just that the absence of
/// user-defined ops can't be guaranteed.
///
/// Note that "user-defined" ops are not the same as "custom" ops;
/// BuiltinOpResolver may support certain "custom" ops, in addition to
/// "builtin" ops, and may not support all of the "builtin" op enum values.
virtual bool MayContainUserDefinedOps() const { return true; }
friend class OpResolverInternal;
};
// Handles the logic for converting between an OperatorCode structure extracted

View File

@@ -279,81 +279,125 @@ inline Integer FloorLog2(Integer n) {
}
}
// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
// softmax
// func - the function to build the LUT for (e.g exp(x))
// min,max - table limits
// table - pointer to buffer
// num - number of elements in the LUT
inline void gen_lut(double (*func)(double), double min, double max,
int16_t* table, const int num) {
// size of table should equal to num + 1
// last element only for slope calculation
double step = (max - min) / (num - 1);
double half_step = step / 2.0;
for (int i = 0; i < num - 1; i++) {
double sample_val = TfLiteRound(func(min + i * step) * 32768.0);
double midpoint_interp_val =
TfLiteRound((func(min + (i + 1) * step) * 32768.0 +
TfLiteRound(func(min + i * step) * 32768.0)) /
2.0);
double midpoint_val =
TfLiteRound(func(min + i * step + half_step) * 32768.0);
double midpoint_err = midpoint_interp_val - midpoint_val;
double bias = TfLiteRound(midpoint_err / 2.0);
table[i] = std::min<double>(std::max<double>(sample_val - bias, -32768.0),
32767.0);
}
table[num - 1] = std::min<double>(
std::max<double>(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
// The size of the LUT depends on the type of input. For int8 inputs a simple
// 256 entries LUT is used. For int16 inputs the high 9 bits are used for
// indexing and the 7 remaining bits are used for interpolation. We thus use a
// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry
// to interpolate the last value.
template <typename LutInT>
constexpr int lut_size() {
static_assert(std::is_same<LutInT, int8_t>::value ||
std::is_same<LutInT, int16_t>::value,
"Only LUTs with int8 or int16 inputs are supported.");
return std::is_same<LutInT, int8_t>::value ? 256 : 513;
}
// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
// softmax
// func - the function to build the LUT for (e.g exp(x))
// min,max - table limits
// table - pointer to buffer
// num - number of elements in the LUT
inline void gen_lut(float (*func)(float), float min, float max, int16_t* table,
const int num) {
// size of table should equal to num + 1
// last element only for slope calculation
float step = (max - min) / (num - 1);
float half_step = step / 2.0f;
for (int i = 0; i < num - 1; i++) {
float sample_val = TfLiteRound(func(min + i * step) * 32768.0f);
float midpoint_interp_val =
TfLiteRound((func(min + (i + 1) * step) * 32768.0f +
TfLiteRound(func(min + i * step) * 32768.0f)) /
2.0f);
float midpoint_val =
TfLiteRound(func(min + i * step + half_step) * 32768.0f);
float midpoint_err = midpoint_interp_val - midpoint_val;
float bias = TfLiteRound(midpoint_err / 2.0f);
table[i] = std::min<float>(std::max<float>(sample_val - bias, -32768.0f),
32767.0f);
}
table[num - 1] = std::min<float>(
std::max<float>(TfLiteRound(func(max) * 32768.0f), -32768.0f), 32767.0f);
// Generate a LUT for 'func' which can be used to approximate functions like
// exp, log, ...
//
// - func: the function to build the LUT for (e.g exp(x))
// - input_min, input_max: range of the func inputs
// - output_min, output_max: range of the func outputs
// - lut: pointer to the LUT table to fill, the table must be of size
// lut_size<LutInT>()
template <typename FloatT, typename LutInT, typename LutOutT>
inline void gen_lut(FloatT (*func)(FloatT), FloatT input_min, FloatT input_max,
FloatT output_min, FloatT output_max, LutOutT* lut) {
static_assert(std::is_same<LutInT, int8_t>::value ||
std::is_same<LutInT, int16_t>::value,
"Only LUTs with int8 or int16 inputs are supported.");
static_assert(std::is_same<LutOutT, int8_t>::value ||
std::is_same<LutOutT, int16_t>::value,
"Only LUTs with int8 or int16 outputs are supported.");
static_assert(std::is_floating_point<FloatT>::value,
"FloatT must be a floating-point type.");
const int nb_steps = std::is_same<LutInT, int8_t>::value ? 256 : 512;
const FloatT step = (input_max - input_min) / nb_steps;
const FloatT half_step = step / 2;
const FloatT output_scaling_inv =
static_cast<FloatT>(std::numeric_limits<LutOutT>::max() -
std::numeric_limits<LutOutT>::min() + 1) /
(output_max - output_min);
const FloatT table_min =
static_cast<FloatT>(std::numeric_limits<LutOutT>::min());
const FloatT table_max =
static_cast<FloatT>(std::numeric_limits<LutOutT>::max());
for (int i = 0; i < nb_steps; i++) {
const FloatT val = func(input_min + i * step);
const FloatT val_midpoint = func(input_min + i * step + half_step);
const FloatT val_next = func(input_min + (i + 1) * step);
const FloatT sample_val = TfLiteRound(val * output_scaling_inv);
const FloatT midpoint_interp_val =
TfLiteRound((val_next * output_scaling_inv +
TfLiteRound(val * output_scaling_inv)) /
2);
const FloatT midpoint_val = TfLiteRound(val_midpoint * output_scaling_inv);
const FloatT midpoint_err = midpoint_interp_val - midpoint_val;
const FloatT bias = TfLiteRound(midpoint_err / 2);
lut[i] = static_cast<LutOutT>(std::min<FloatT>(
std::max<FloatT>(sample_val - bias, table_min), table_max));
}
// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
// 512 base value, lut[513] only for calculate slope
uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
const bool with_extra_interpolation_value =
std::is_same<LutInT, int16_t>::value;
if (with_extra_interpolation_value) {
lut[nb_steps] = static_cast<LutOutT>(std::min<FloatT>(
std::max<FloatT>(TfLiteRound(func(input_max) * output_scaling_inv),
table_min),
table_max));
}
}
// LUT must have 513 values
template <typename LutOutT>
inline LutOutT lut_lookup_with_interpolation(int16_t value,
const LutOutT* lut) {
static_assert(std::is_same<LutOutT, int8_t>::value ||
std::is_same<LutOutT, int16_t>::value,
"Only LUTs with int8 or int16 outputs are supported.");
// 512 base values, lut[513] is only used to calculate the slope
const uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
assert(index < 512 && "LUT index out of range.");
int16_t offset = value & 0x7f;
const int16_t offset = value & 0x7f;
// base and slope are Q0.15
int16_t base = lut[index];
int16_t slope = lut[index + 1] - lut[index];
// Base and slope are Q0.x
const LutOutT base = lut[index];
const LutOutT slope = lut[index + 1] - lut[index];
// Q0.15 * Q0.7 = Q0.22
// Round and convert from Q0.22 to Q0.15
int32_t delta = (static_cast<int32_t>(slope) * offset + 64) >> 7;
// Q0.x * Q0.7 = Q0.(x + 7)
// Round and convert from Q0.(x + 7) to Q0.x
const int delta = (slope * offset + 64) >> 7;
// Q0.15 + Q0.15
return base + delta;
return static_cast<LutOutT>(base + delta);
}
// int16_t -> int16_t table lookup with interpolation
// LUT must have 513 values
inline int16_t lut_lookup(int16_t value, const int16_t* lut) {
return lut_lookup_with_interpolation(value, lut);
}
// int16_t -> int8_t table lookup with interpolation
// LUT must have 513 values
inline int8_t lut_lookup(int16_t value, const int8_t* lut) {
return lut_lookup_with_interpolation(value, lut);
}
// int8_t -> int8_t table lookup without interpolation
// LUT must have 256 values
inline int8_t lut_lookup(int8_t value, const int8_t* lut) {
return lut[128 + value];
}
// int8_t -> int16_t table lookup without interpolation
// LUT must have 256 values
inline int16_t lut_lookup(int8_t value, const int16_t* lut) {
return lut[128 + value];
}
// Table of sigmoid(i/24) at 0.16 format - 256 elements.
@@ -575,7 +619,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
// InputIntegerBits - z_b_headroom - 0.25);
const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)),
static_cast<int32_t>(InputIntegerBits - z_a_headroom_plus_1),
31 - kAccumIntegerBits)),
shifted_quarter);
// z_b is treated like z_a, but premultiplying by sqrt(0.5).
@@ -585,7 +630,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)),
static_cast<int32_t>(InputIntegerBits - z_b_headroom),
31 - kAccumIntegerBits)),
shifted_quarter);
const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));

View File

@@ -20,8 +20,7 @@ limitations under the License.
namespace tflite {
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
defined(__ZEPHYR__)
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__)
#define TF_LITE_GLOBAL_STD_PREFIX
#else
#define TF_LITE_GLOBAL_STD_PREFIX std

View File

@@ -15,26 +15,6 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#define USE_NEON
#include <arm_neon.h>
#endif
#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
#define USE_NEON
#include "NEON_2_SSE.h"
#endif
// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
// defined, PortableSomeFunc(args) otherwise.
#ifdef USE_NEON
// Always use Neon code
#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
#else
// No NEON available: Use Portable code
#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
#endif // defined(USE_NEON)
// TFLM does not need to utilize any Neon optimizations.
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_

View File

@@ -15,6 +15,8 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
#include <type_traits>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
@@ -27,25 +29,14 @@ inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
T activation_min, activation_max;
GetActivationParams(params, &activation_min, &activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] + input2_data[i], params.quantized_activation_min,
params.quantized_activation_max);
}
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const float* input1_data,
const RuntimeShape& input2_shape, const float* input2_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
auto x = input1_data[i] + input2_data[i];
output_data[i] = ActivationFunctionWithMinMax(
x, params.float_activation_min, params.float_activation_max);
input1_data[i] + input2_data[i], activation_min, activation_max);
}
}
@@ -202,13 +193,12 @@ inline void Add(const ArithmeticParams& params,
}
}
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const float* input1_data,
const RuntimeShape& input2_shape,
const float* input2_data,
const RuntimeShape& output_shape,
float* output_data) {
template <typename T>
inline typename std::enable_if<!is_small_integer<T>::value, void>::type
BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@@ -216,6 +206,9 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
T activation_min, activation_max;
GetActivationParams(params, &activation_min, &activation_max);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
@@ -232,51 +225,10 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
output_data[Offset(extended_output_shape, b, y, x, c)] =
ActivationFunctionWithMinMax(
ActivationFunctionWithMinMax<T>(
input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
params.float_activation_min, params.float_activation_max);
}
}
}
}
}
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int32_t* input1_data,
const RuntimeShape& input2_shape,
const int32_t* input2_data,
const RuntimeShape& output_shape,
int32_t* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
output_data[Offset(extended_output_shape, b, y, x, c)] =
ActivationFunctionWithMinMax(
input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
params.quantized_activation_min,
params.quantized_activation_max);
activation_min, activation_max);
}
}
}
@@ -287,10 +239,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
// is 32-bit for both cases. The overflow does not happen due to the
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
template <typename T>
inline void BroadcastAdd4DSlow(
const ArithmeticParams& params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
inline typename std::enable_if<is_small_integer<T>::value, void>::type
BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,

View File

@@ -15,7 +15,10 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
#include "tensorflow/lite/kernels/internal/types.h"
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
@@ -36,6 +39,47 @@ inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
}
}
inline void AddN(const ArithmeticParams& params,
const RuntimeShape& input_shape, const size_t num_inputs,
const int8_t* const* input_data, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
// All inputs should have same zero-point and scale, this is checked during
// Prepare stage.
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
// All inputs and output should have the same shape, this is checked during
// Prepare stage.
const size_t size = input_shape.FlatSize();
for (size_t i = 0; i < size; ++i) {
// accumulate in scaled_x before clamping to avoid overflow
const int32_t x = params.input1_offset; // x = 0
const int32_t shifted_x = x * (1 << params.left_shift);
int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_x, params.input1_multiplier, params.input1_shift);
for (size_t j = 0; j < num_inputs; ++j) {
const int32_t y = params.input1_offset + input_data[j][i];
const int32_t shifted_y = y * (1 << params.left_shift);
int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_y, params.input1_multiplier, params.input1_shift);
scaled_x += scaled_y;
}
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
scaled_x, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<int8_t>(clamped_output);
}
}
} // namespace reference_ops
} // namespace tflite

View File

@@ -0,0 +1,275 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
#include <algorithm>
#include <cstdint>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/tensor_utils_common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
namespace batch_matmul {
// Determine which dimension is the broadcast dimension.
inline int broadcast_dim(int lhs_dim, int rhs_dim) {
if (lhs_dim == rhs_dim) return lhs_dim;
if (lhs_dim == 1) return rhs_dim;
TFLITE_DCHECK_EQ(rhs_dim, 1);
return lhs_dim;
}
// Compute the "extent" for iterating on this dimension.
// If we are broadcasting, then don't advance (i.e return 0).
inline int extent(const RuntimeShape& shape, int x) {
if (shape.Dims(x) == 1) {
return 0;
}
int prod = 1;
for (int i = x + 1; i < shape.DimensionsCount(); ++i) {
prod *= shape.Dims(i);
}
return prod;
}
} // namespace batch_matmul
template <typename Ta, typename Tb, typename Tout>
inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data,
const RuntimeShape& rhs_shape, const Tb* rhs_data,
const RuntimeShape& output_shape, Tout* output_data) {
const RuntimeShape extended_lhs_shape =
RuntimeShape::ExtendedShape(5, lhs_shape);
const RuntimeShape extended_rhs_shape =
RuntimeShape::ExtendedShape(5, rhs_shape);
const int batch_dim0 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
const int batch_dim1 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
const int batch_dim2 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
// Set params for each matrix multiply.
const int lhs_rows = extended_lhs_shape.Dims(3);
const int rhs_cols = extended_rhs_shape.Dims(4);
const int accum_depth = extended_lhs_shape.Dims(4);
for (int b0 = 0; b0 < batch_dim0; ++b0) {
const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
for (int b1 = 0; b1 < batch_dim1; ++b1) {
const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
for (int b2 = 0; b2 < batch_dim2; ++b2) {
const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
b1 * batch_dim2 + b2) *
lhs_rows * rhs_cols;
for (int j = 0; j < rhs_cols; ++j) {
for (int i = 0; i < lhs_rows; ++i) {
Tout total = 0;
for (int k = 0; k < accum_depth; ++k) {
total += static_cast<Tout>(lhs_ptr2[accum_depth * i + k]) *
static_cast<Tout>(rhs_ptr2[j * accum_depth + k]);
}
int idx = lhs_rows * j + i;
out_ptr[idx] = total;
}
}
}
}
}
}
inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data,
const RuntimeShape& rhs_shape, const int8_t* rhs_data,
const float* scaling_factors,
const int32_t* input_offset, int32_t* row_sums,
const RuntimeShape& output_shape, float* output_data,
bool* compute_row_sums) {
const RuntimeShape extended_lhs_shape =
RuntimeShape::ExtendedShape(5, lhs_shape);
const RuntimeShape extended_rhs_shape =
RuntimeShape::ExtendedShape(5, rhs_shape);
const int batch_dim0 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
const int batch_dim1 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
const int batch_dim2 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
// Set params for each matrix multiply.
const int lhs_rows = extended_lhs_shape.Dims(3);
const int rhs_cols = extended_rhs_shape.Dims(4);
const int accum_depth = extended_lhs_shape.Dims(4);
const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols;
const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols;
const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols;
const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows;
const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows;
const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows;
if (!compute_row_sums || *compute_row_sums) {
int num_weights_matrices = 1;
for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) {
num_weights_matrices *= extended_lhs_shape.Dims(i);
}
tensor_utils::ReductionSumVector(
lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth);
if (compute_row_sums) {
*compute_row_sums = false;
}
}
for (int b0 = 0; b0 < batch_dim0; ++b0) {
const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0);
const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0);
const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0);
for (int b1 = 0; b1 < batch_dim1; ++b1) {
const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1);
const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1);
const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1);
for (int b2 = 0; b2 < batch_dim2; ++b2) {
const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2);
const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2);
const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2);
float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
b1 * batch_dim2 + b2) *
lhs_rows * rhs_cols;
for (int j = 0; j < rhs_cols; ++j) {
const float batch_scaling_factor = scale_ptr2[j];
const float batch_offset = static_cast<float>(ioff_ptr2[j]);
for (int i = 0; i < lhs_rows; ++i) {
int32_t total = 0;
for (int k = 0; k < accum_depth; ++k) {
total +=
lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k];
}
int32_t row_sum = woff_ptr2[i];
total -= row_sum * batch_offset;
int idx = lhs_rows * j + i;
out_ptr[idx] += batch_scaling_factor * total;
}
}
}
}
}
}
template <typename T, typename AccumT>
inline void BatchMatMul(const FullyConnectedParams& params,
const RuntimeShape& lhs_shape, const T* lhs_data,
const RuntimeShape& rhs_shape, const T* rhs_data,
const RuntimeShape& output_shape, T* output_data) {
const RuntimeShape extended_lhs_shape =
RuntimeShape::ExtendedShape(5, lhs_shape);
const RuntimeShape extended_rhs_shape =
RuntimeShape::ExtendedShape(5, rhs_shape);
const int batch_dim0 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
const int batch_dim1 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
const int batch_dim2 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
// Set params for each matrix multiply.
const int lhs_rows = extended_lhs_shape.Dims(3);
const int rhs_cols = extended_rhs_shape.Dims(4);
const int accum_depth = extended_lhs_shape.Dims(4);
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
for (int b0 = 0; b0 < batch_dim0; ++b0) {
const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
for (int b1 = 0; b1 < batch_dim1; ++b1) {
const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
for (int b2 = 0; b2 < batch_dim2; ++b2) {
const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
T* out_ptr = output_data +
((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
lhs_rows * rhs_cols;
for (int j = 0; j < rhs_cols; ++j) {
for (int i = 0; i < lhs_rows; ++i) {
AccumT total = 0;
for (int k = 0; k < accum_depth; ++k) {
AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
AccumT rhs_val = rhs_ptr2[accum_depth * j + k];
total += (lhs_val + filter_offset) * (rhs_val + input_offset);
}
int32_t total_scaled = MultiplyByQuantizedMultiplier(
total, output_multiplier, output_shift);
total_scaled += output_offset;
total_scaled = std::max(total_scaled, output_activation_min);
total_scaled = std::min(total_scaled, output_activation_max);
const int idx = lhs_rows * j + i;
out_ptr[idx] = static_cast<T>(total_scaled);
}
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_

View File

@@ -0,0 +1,175 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
#include <algorithm>
#include <cstdint>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
bool exclusive, bool reverse, T* output_data) {
const int32_t rank = shape.DimensionsCount();
TFLITE_DCHECK_GE(rank, 1);
TFLITE_DCHECK_GE(axis, 0);
TFLITE_DCHECK_LT(axis, rank);
size_t inner = 1;
size_t outer = 1;
size_t depth = 1;
for (int32_t i = 0; i < rank; i++) {
if (i < axis)
inner *= shape.Dims(i);
else if (i > axis)
outer *= shape.Dims(i);
else
depth = shape.Dims(i);
}
for (size_t outer_index = 0; outer_index < outer; outer_index++) {
size_t outer_index_adj;
if (reverse)
outer_index_adj = (outer - 1) - outer_index;
else
outer_index_adj = outer_index;
for (size_t inner_index = 0; inner_index < inner; inner_index++) {
T accumulator = 0;
size_t inner_index_adj;
if (reverse)
inner_index_adj = (inner - 1) - inner_index;
else
inner_index_adj = inner_index;
for (size_t depth_index = 0; depth_index < depth; depth_index++) {
size_t depth_index_adj;
if (reverse)
depth_index_adj = (depth - 1) - depth_index;
else
depth_index_adj = depth_index;
size_t index = outer_index_adj;
index += inner_index_adj * depth * outer;
index += depth_index_adj * outer;
if (exclusive) {
output_data[index] = accumulator;
accumulator += input_data[index];
} else {
accumulator += input_data[index];
output_data[index] = accumulator;
}
}
}
}
}
//
// Quantized INT8 CUMSUM
//
inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
const RuntimeShape& shape, int32_t axis, bool exclusive,
bool reverse, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
// All inputs should have same zero-point and scale, this is checked during
// Prepare stage.
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
const int32_t rank = shape.DimensionsCount();
TFLITE_DCHECK_GE(rank, 1);
TFLITE_DCHECK_GE(axis, 0);
TFLITE_DCHECK_LT(axis, rank);
size_t inner = 1;
size_t outer = 1;
size_t depth = 1;
for (int32_t i = 0; i < rank; i++) {
if (i < axis)
inner *= shape.Dims(i);
else if (i > axis)
outer *= shape.Dims(i);
else
depth = shape.Dims(i);
}
for (size_t outer_index = 0; outer_index < outer; outer_index++) {
size_t outer_index_adj;
if (reverse)
outer_index_adj = (outer - 1) - outer_index;
else
outer_index_adj = outer_index;
for (size_t inner_index = 0; inner_index < inner; inner_index++) {
int32_t accumulator = params.input1_offset; // accumulator = 0
accumulator *= (1 << params.left_shift);
accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
accumulator, params.input1_multiplier, params.input1_shift);
size_t inner_index_adj;
if (reverse)
inner_index_adj = (inner - 1) - inner_index;
else
inner_index_adj = inner_index;
for (size_t depth_index = 0; depth_index < depth; depth_index++) {
size_t depth_index_adj;
if (reverse)
depth_index_adj = (depth - 1) - depth_index;
else
depth_index_adj = depth_index;
size_t index = outer_index_adj;
index += inner_index_adj * depth * outer;
index += depth_index_adj * outer;
const int32_t y = params.input1_offset + input_data[index];
const int32_t shifted_y = y * (1 << params.left_shift);
const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_y, params.input1_multiplier, params.input1_shift);
int32_t scaled_output;
if (exclusive) {
scaled_output = accumulator;
accumulator += scaled_y;
} else {
accumulator += scaled_y;
scaled_output = accumulator;
}
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
scaled_output, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[index] = static_cast<int8_t>(clamped_output);
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_

View File

@@ -0,0 +1,79 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int input_depth = input_shape.Dims(3);
const int input_width = input_shape.Dims(2);
const int input_height = input_shape.Dims(1);
const int input_batch = input_shape.Dims(0);
const int output_depth = output_shape.Dims(3);
const int output_width = output_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_batch = output_shape.Dims(0);
const int32_t block_size = op_params.block_size;
TFLITE_DCHECK_EQ(input_width * block_size, output_width);
TFLITE_DCHECK_EQ(input_height * block_size, output_height);
TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size);
TFLITE_DCHECK_EQ(input_batch, output_batch);
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_h = 0; out_h < output_height; ++out_h) {
for (int out_w = 0; out_w < output_width; ++out_w) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
const int in_d =
out_d + ((out_h % block_size) * block_size + out_w % block_size) *
output_depth;
const int in_w = out_w / block_size;
const int in_h = out_h / block_size;
const int in_b = out_b;
const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
const int output_index =
Offset(output_shape, out_b, out_h, out_w, out_d);
output_data[output_index] = input_data[input_index];
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_

View File

@@ -1,239 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void DivCheckArithmeticParams(const ArithmeticParams& params) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
constexpr int32_t max_value =
static_cast<int32_t>(std::numeric_limits<T>::max());
TFLITE_DCHECK_GE(params.input1_offset, -max_value);
TFLITE_DCHECK_LE(params.input1_offset, max_value);
TFLITE_DCHECK_GE(params.input2_offset, -max_value);
TFLITE_DCHECK_LE(params.input2_offset, max_value);
TFLITE_DCHECK_GE(params.output_offset, -max_value);
TFLITE_DCHECK_LE(params.output_offset, max_value);
}
// Element-wise div that can often be used for inner loop of broadcast Div as
// well as the non-broadcast Div.
template <typename T>
inline void DivElementwise(int size, const ArithmeticParams& params,
const T* input1_data, const T* input2_data,
T* output_data) {
DivCheckArithmeticParams<T>(params);
for (int i = 0; i < size; ++i) {
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
TFLITE_DCHECK_NE(input2_val, 0);
int recip_shift;
const int32_t input2_inv =
(input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
: -GetReciprocal(-input2_val, 31, &recip_shift);
const int headroom = CountLeadingSignBits(input1_val);
const int32_t unscaled_quotient =
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
headroom);
const int total_shift = params.output_shift - recip_shift - headroom;
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplierSmallerThanOneExp(
unscaled_quotient, params.output_multiplier, total_shift);
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<T>(clamped_output);
}
}
inline void Div(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const uint8_t* input1_data,
const RuntimeShape& input2_shape, const uint8_t* input2_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void Div(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int8_t* input1_data,
const RuntimeShape& input2_shape, const int8_t* input2_data,
const RuntimeShape& output_shape, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
}
template <typename T, int N = 5>
inline void BroadcastDivSlowQuantized(
const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape,
const T* input1_data, const RuntimeShape& unextended_input2_shape,
const T* input2_data, const RuntimeShape& unextended_output_shape,
T* output_data) {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
DivCheckArithmeticParams<T>(params);
auto div_func = [&](int indexes[N]) {
const int32_t input1_val =
params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
const int32_t input2_val =
params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
TFLITE_DCHECK_NE(input2_val, 0);
int recip_shift;
const int32_t input2_inv =
(input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
: -GetReciprocal(-input2_val, 31, &recip_shift);
const int headroom = CountLeadingSignBits(input1_val);
const int32_t unscaled_quotient =
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
headroom);
const int total_shift = params.output_shift - recip_shift - headroom;
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplierSmallerThanOneExp(
unscaled_quotient, params.output_multiplier, total_shift);
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[SubscriptToIndex(output_desc, indexes)] =
static_cast<T>(clamped_output);
};
NDOpsHelper<N>(output_desc, div_func);
}
template <int N = 5>
inline void BroadcastDivSlow(const ArithmeticParams& params,
const RuntimeShape& unextended_input1_shape,
const uint8_t* input1_data,
const RuntimeShape& unextended_input2_shape,
const uint8_t* input2_data,
const RuntimeShape& unextended_output_shape,
uint8_t* output_data) {
BroadcastDivSlowQuantized<uint8_t, N>(
params, unextended_input1_shape, input1_data, unextended_input2_shape,
input2_data, unextended_output_shape, output_data);
}
template <int N = 5>
inline void BroadcastDivSlow(const ArithmeticParams& params,
const RuntimeShape& unextended_input1_shape,
const int8_t* input1_data,
const RuntimeShape& unextended_input2_shape,
const int8_t* input2_data,
const RuntimeShape& unextended_output_shape,
int8_t* output_data) {
BroadcastDivSlowQuantized<int8_t, N>(
params, unextended_input1_shape, input1_data, unextended_input2_shape,
input2_data, unextended_output_shape, output_data);
}
// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
// dimensionality if the runtime code does a single loop over one dimension
// that handles broadcasting as the base case. The code generator would then
// generate max(D1, D2) nested for loops.
template <typename T, int N = 5>
void BroadcastDivSlow(const ArithmeticParams& params,
const RuntimeShape& unextended_input1_shape,
const T* input1_data,
const RuntimeShape& unextended_input2_shape,
const T* input2_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
T output_activation_min;
T output_activation_max;
GetActivationParams(params, &output_activation_min, &output_activation_max);
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest
// stride, typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
auto div_func = [&](int indexes[N]) {
output_data[SubscriptToIndex(output_desc, indexes)] =
ActivationFunctionWithMinMax(
input1_data[SubscriptToIndex(desc1, indexes)] /
input2_data[SubscriptToIndex(desc2, indexes)],
output_activation_min, output_activation_max);
};
NDOpsHelper<N>(output_desc, div_func);
}
template <typename T>
inline void Div(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
T output_activation_min;
T output_activation_max;
GetActivationParams(params, &output_activation_min, &output_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] / input2_data[i], output_activation_min,
output_activation_max);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_

View File

@@ -0,0 +1,35 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
#include <cmath>
#include <functional>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
T FloorDiv(T input1, T input2) {
return std::floor(std::divides<double>()(static_cast<double>(input1),
static_cast<double>(input2)));
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_

View File

@@ -0,0 +1,44 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
#include <cmath>
#include <functional>
namespace tflite {
namespace reference_ops {
template <typename T>
T FloorMod(T input1, T input2) {
struct FloatMod {
float operator()(const float lhs, const float rhs) const {
return std::fmod(lhs, rhs);
}
};
using ModFunc = typename std::conditional<std::is_integral<T>::value,
std::modulus<T>, FloatMod>::type;
ModFunc mod_func;
T trunc_mod = mod_func(input1, input2);
return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0))
? (trunc_mod + input2)
: trunc_mod;
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_

View File

@@ -21,7 +21,7 @@ limitations under the License.
namespace tflite {
namespace reference_integer_ops {
inline void AveragePool(const PoolParams& params,
inline bool AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const int8_t* input_data,
const RuntimeShape& output_shape, int8_t* output_data) {
@@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params,
filter_count++;
}
}
if (filter_count == 0) return false;
// Round to the closest integer value.
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
: (acc - filter_count / 2) / filter_count;
@@ -77,6 +78,7 @@ inline void AveragePool(const PoolParams& params,
}
}
}
return true;
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
@@ -136,7 +138,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
}
}
inline void AveragePool(const PoolParams& params,
inline bool AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const int16_t* input_data,
const RuntimeShape& output_shape,
@@ -182,6 +184,7 @@ inline void AveragePool(const PoolParams& params,
filter_count++;
}
}
if (filter_count == 0) return false;
// Round to the closest integer value.
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
: (acc - filter_count / 2) / filter_count;
@@ -193,6 +196,7 @@ inline void AveragePool(const PoolParams& params,
}
}
}
return true;
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,

View File

@@ -0,0 +1,256 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
#include <algorithm>
#include <cstddef>
#include <limits>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
inline void LogSoftmax(const SoftmaxParams& params,
const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
for (int i = 0; i < outer_size; ++i) {
// Find max element value which we'll use to ensure numerical stability
// taking advantage of the following equality:
// log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
float max = std::numeric_limits<float>::lowest();
for (int c = 0; c < depth; ++c) {
max = std::max(max, input_data[i * depth + c]);
}
// Compute sum.
float sum = 0.f;
for (int c = 0; c < depth; ++c) {
sum += std::exp(input_data[i * depth + c] - max);
}
// Compute result.
const float log_sum = std::log(sum);
for (int c = 0; c < depth; ++c) {
output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
}
}
}
inline void LogSoftmax(const SoftmaxParams& params,
const RuntimeShape& input_shape,
const uint8_t* input_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
const int32_t input_multiplier = params.input_multiplier;
const int32_t input_left_shift = params.input_left_shift;
const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
const int32_t reverse_scaling_right_shift =
params.reverse_scaling_right_shift;
const int diff_min = params.diff_min;
// The representation chosen for the input to the exp() function is Q5.26.
// We need to leave extra space since values that we skip might be as large
// as -32 before multiplying by input_beta_multiplier, and therefore as
// large as -16 afterwards. Note that exp(-8) is definitely not
// insignificant to accumulation, but exp(-16) definitely is.
static constexpr int kScaledDiffIntegerBits = 5;
static constexpr int kAccumulationIntegerBits = 12;
static constexpr int kOutputIntegerBits = 4;
using FixedPointScaledDiff =
gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
using FixedPointAccum =
gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
for (int i = 0; i < outer_size; ++i) {
uint8_t max_in_row = 0;
for (int c = 0; c < depth; ++c) {
max_in_row = std::max(max_in_row, input_data[i * depth + c]);
}
FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
for (int c = 0; c < depth; ++c) {
int32_t input_diff =
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
if (input_diff >= diff_min) {
const int32_t input_diff_rescaled =
MultiplyByQuantizedMultiplierGreaterThanOne(
input_diff, input_multiplier, input_left_shift);
const FixedPointScaledDiff scaled_diff_f8 =
FixedPointScaledDiff::FromRaw(input_diff_rescaled);
sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
exp_on_negative_values(scaled_diff_f8));
}
}
const int32_t fixed_log_sum_of_exps =
log_x_for_x_greater_than_or_equal_to_1<kScaledDiffIntegerBits>(
sum_of_exps)
.raw();
// rescaled_diff_min is smallest representable in
// Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the
// log-sub-exps that will be subtracted in the loop.
//
// The thresholds diff_min, etc are negative.
const int rescaled_diff_min =
fixed_log_sum_of_exps + std::numeric_limits<int32_t>::lowest();
const int adjusted_diff_min =
std::max(static_cast<int32_t>(
diff_min - 1), // Note use of > below instead of >= above.
MultiplyByQuantizedMultiplierSmallerThanOneExp(
rescaled_diff_min, reverse_scaling_divisor,
-reverse_scaling_right_shift));
for (int c = 0; c < depth; ++c) {
int32_t input_diff =
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
if (input_diff > adjusted_diff_min) {
const int32_t input_diff_rescaled =
MultiplyByQuantizedMultiplierGreaterThanOne(
input_diff, input_multiplier, input_left_shift);
int32_t unsat_output =
gemmlowp::RoundingDivideByPOT(
(input_diff_rescaled - fixed_log_sum_of_exps),
31 - kScaledDiffIntegerBits - kOutputIntegerBits) +
255;
output_data[i * depth + c] = static_cast<uint8_t>(
std::max(std::min(unsat_output, static_cast<int32_t>(255)),
static_cast<int32_t>(0)));
} else {
// Set output to smallest value.
output_data[i * depth + c] = 0;
}
}
}
}
template <typename T>
inline void LogSoftmaxQuantized(const SoftmaxParams& params,
const size_t outer_size, const size_t depth,
const RuntimeShape& input_shape,
const T* input_data,
const RuntimeShape& output_shape,
T* output_data) {
const int32_t input_multiplier = params.input_multiplier;
const int32_t input_left_shift = params.input_left_shift;
const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
const int32_t reverse_scaling_right_shift =
params.reverse_scaling_right_shift;
const int diff_min = params.diff_min;
static constexpr T kMinT8 = std::numeric_limits<T>::min();
static constexpr T kMaxT8 = std::numeric_limits<T>::max();
static constexpr int32_t kMinInt32 = std::numeric_limits<int32_t>::min();
// All IntegerBits must agree with Prepare function.
// Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible.
static constexpr int kInputIntegerBits = 5;
static constexpr int kAccumulationIntegerBits = 12;
static constexpr int kOutputIntegerBits = 4;
using F5 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
using F12 = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
for (size_t outer_index = 0; outer_index < outer_size; ++outer_index) {
T max_in_row = kMinT8;
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
max_in_row =
std::max(max_in_row, input_data[outer_index * depth + inner_index]);
}
// Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps.
F12 sum_of_exps_in_q12 = F12::FromRaw(0);
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input_diff =
static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
max_in_row;
if (input_diff >= diff_min) {
const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
input_diff, input_multiplier, input_left_shift);
sum_of_exps_in_q12 =
sum_of_exps_in_q12 +
gemmlowp::Rescale<kAccumulationIntegerBits>(
exp_on_negative_values(F5::FromRaw(input_diff_in_q5)));
}
}
const int32_t log_sum_of_exps_in_q5 =
log_x_for_x_greater_than_or_equal_to_1<kInputIntegerBits>(
sum_of_exps_in_q12)
.raw();
// Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is
// smallest representable in Q5.26 plus the log_sum_of_exps.
const int32_t shifted_log_sum_of_exps_in_q5 =
log_sum_of_exps_in_q5 + kMinInt32;
const int32_t adjusted_diff_min =
std::max(static_cast<int32_t>(diff_min - 1),
MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5,
reverse_scaling_divisor,
-reverse_scaling_right_shift));
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input_diff =
static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
max_in_row;
// Note use of > below instead of >= above.
if (input_diff > adjusted_diff_min) {
const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
input_diff, input_multiplier, input_left_shift);
// Rescale and downcast.
int32_t output_in_q27 =
gemmlowp::RoundingDivideByPOT(
(input_diff_in_q5 - log_sum_of_exps_in_q5),
31 - kInputIntegerBits - kOutputIntegerBits) +
kMaxT8;
output_in_q27 =
std::max(std::min(output_in_q27, static_cast<int32_t>(kMaxT8)),
static_cast<int32_t>(kMinT8));
output_data[outer_index * depth + inner_index] =
static_cast<T>(output_in_q27);
} else {
output_data[outer_index * depth + inner_index] = kMinT8;
}
}
}
}
inline void LogSoftmax(const SoftmaxParams& params, const size_t outer_size,
const size_t depth, const RuntimeShape& input_shape,
const int8_t* input_data,
const RuntimeShape& output_shape, int8_t* output_data) {
LogSoftmaxQuantized(params, outer_size, depth, input_shape, input_data,
output_shape, output_data);
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_

View File

@@ -51,7 +51,7 @@ inline void Mul(const ArithmeticParams& params,
GetActivationParams(params, &output_activation_min, &output_activation_max);
const int flat_size =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] * input2_data[i], output_activation_min,
@@ -66,7 +66,7 @@ inline void Mul(const ArithmeticParams& params,
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape);
MulElementwise(flat_size, params, input1_data, input2_data, output_data);
}

View File

@@ -24,8 +24,8 @@ namespace tflite {
namespace reference_ops {
// TFLite Pad supports activation tensors with up to 4 dimensions.
constexpr int PadKernelMaxDimensionCount() { return 4; }
// TFLite Pad supports activation tensors with up to 5 dimensions.
constexpr int PadKernelMaxDimensionCount() { return 5; }
// There are two versions of pad: Pad and PadV2. In PadV2 there is a second
// scalar input that provides the padding value. Therefore pad_value_ptr can be
@@ -46,8 +46,8 @@ inline void PadImpl(const tflite::PadParams& op_params,
TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
// Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
// pad them to 4 dims (yes, we are "padding the padding").
// Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
// pad them to 5 dims (yes, we are "padding the padding").
int left_padding_copy[PadKernelMaxDimensionCount()];
for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
left_padding_copy[i] = 0;
@@ -67,30 +67,36 @@ inline void PadImpl(const tflite::PadParams& op_params,
}
const int output_batch = ext_output_shape.Dims(0);
const int output_height = ext_output_shape.Dims(1);
const int output_width = ext_output_shape.Dims(2);
const int output_depth = ext_output_shape.Dims(3);
const int output_plane = ext_output_shape.Dims(1);
const int output_height = ext_output_shape.Dims(2);
const int output_width = ext_output_shape.Dims(3);
const int output_depth = ext_output_shape.Dims(4);
const int left_b_padding = left_padding_copy[0];
const int left_h_padding = left_padding_copy[1];
const int left_w_padding = left_padding_copy[2];
const int left_d_padding = left_padding_copy[3];
const int left_p_padding = left_padding_copy[1];
const int left_h_padding = left_padding_copy[2];
const int left_w_padding = left_padding_copy[3];
const int left_d_padding = left_padding_copy[4];
const int right_b_padding = right_padding_copy[0];
const int right_h_padding = right_padding_copy[1];
const int right_w_padding = right_padding_copy[2];
const int right_d_padding = right_padding_copy[3];
const int right_p_padding = right_padding_copy[1];
const int right_h_padding = right_padding_copy[2];
const int right_w_padding = right_padding_copy[3];
const int right_d_padding = right_padding_copy[4];
const T pad_value = *pad_value_ptr;
const T* in_ptr = input_data;
T* out_ptr = output_data;
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_p = 0; out_p < output_plane; ++out_p) {
for (int out_h = 0; out_h < output_height; ++out_h) {
for (int out_w = 0; out_w < output_width; ++out_w) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
if (out_b < left_b_padding ||
out_b >= output_batch - right_b_padding ||
out_p < left_p_padding ||
out_p >= output_plane - right_p_padding ||
out_h < left_h_padding ||
out_h >= output_height - right_h_padding ||
out_w < left_w_padding ||
@@ -106,6 +112,7 @@ inline void PadImpl(const tflite::PadParams& op_params,
}
}
}
}
template <typename T, typename P>
inline void Pad(const tflite::PadParams& op_params,

View File

@@ -23,7 +23,7 @@ limitations under the License.
namespace tflite {
namespace reference_ops {
inline void AveragePool(const PoolParams& params,
inline bool AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
@@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params,
filter_count++;
}
}
if (filter_count == 0) return false;
const float average = total / filter_count;
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
ActivationFunctionWithMinMax(average, params.float_activation_min,
@@ -74,9 +75,10 @@ inline void AveragePool(const PoolParams& params,
}
}
}
return true;
}
inline void AveragePool(const PoolParams& params,
inline bool AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const uint8_t* input_data,
const RuntimeShape& output_shape,
@@ -122,6 +124,7 @@ inline void AveragePool(const PoolParams& params,
filter_count++;
}
}
if (filter_count == 0) return false;
acc = (acc + filter_count / 2) / filter_count;
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
@@ -131,6 +134,7 @@ inline void AveragePool(const PoolParams& params,
}
}
}
return true;
}
inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,

View File

@@ -0,0 +1,774 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <limits>
#include <utility>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
#if defined(_MSC_VER)
#define __restrict__ __restrict
#endif
namespace tflite {
namespace tensor_utils {
namespace {
const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
} // namespace
void PortableSymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* min_value,
float* max_value, float* scaling_factor) {
auto minmax = std::minmax_element(values, values + size);
*min_value = *minmax.first;
*max_value = *minmax.second;
PortableSymmetricQuantizeFloats(values, size, quantized_values, *min_value,
*max_value, scaling_factor);
}
void PortableSymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float min_value,
float max_value, float* scaling_factor) {
const int32_t kScale = 127;
const float range = std::max(std::abs(min_value), std::abs(max_value));
if (range == 0) {
memset(quantized_values, 0, size * sizeof(int8_t));
*scaling_factor = 1;
return;
}
*scaling_factor = range / kScale;
const float scaling_factor_inv = kScale / range;
for (int i = 0; i < size; ++i) {
const int32_t quantized_value =
static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = static_cast<int8_t>(
std::min(kScale, std::max(-kScale, quantized_value)));
}
}
void PortableAsymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values,
float* scaling_factor, int32_t* offset) {
const int32_t kMinScale = -128;
const int32_t kMaxScale = 127;
const double qmin_double = kMinScale;
const double qmax_double = kMaxScale;
const auto minmax = std::minmax_element(values, values + size);
const double rmin = std::fmin(0, *minmax.first);
const double rmax = std::fmax(0, *minmax.second);
if (rmin == rmax) {
memset(quantized_values, 0, size * sizeof(int8_t));
*scaling_factor = 1;
*offset = 0;
return;
} else {
double scale = (rmax - rmin) / (qmax_double - qmin_double);
const double zero_point_from_min = qmin_double - rmin / scale;
const double zero_point_from_max = qmax_double - rmax / scale;
const double zero_point_from_min_error =
std::abs(qmin_double) + std::abs(rmin / scale);
const double zero_point_from_max_error =
std::abs(qmax_double) + std::abs(rmax / scale);
const double zero_point_double =
zero_point_from_min_error < zero_point_from_max_error
? zero_point_from_min
: zero_point_from_max;
int8_t nudged_zero_point = 0;
if (zero_point_double <= qmin_double) {
nudged_zero_point = kMinScale;
} else if (zero_point_double >= qmax_double) {
nudged_zero_point = kMaxScale;
} else {
nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
}
*scaling_factor = scale;
*offset = nudged_zero_point;
}
const float scaling_factor_inv = 1.0f / *scaling_factor;
for (int i = 0; i < size; ++i) {
const int32_t quantized_value = static_cast<int32_t>(
TfLiteRound(*offset + values[i] * scaling_factor_inv));
quantized_values[i] =
std::min(kMaxScale, std::max(kMinScale, quantized_value));
}
}
void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
int m_rows, int m_cols,
const float* vector,
int n_batch, float* result) {
float* result_in_batch = result;
for (int b = 0; b < n_batch; b++) {
const float* matrix_ptr = matrix;
for (int r = 0; r < m_rows; r++) {
float dot_prod = 0.0f;
const float* vector_in_batch = vector + b * m_cols;
for (int c = 0; c < m_cols; c++) {
dot_prod += *matrix_ptr++ * *vector_in_batch++;
}
*result_in_batch += dot_prod;
++result_in_batch;
}
}
}
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result) {
for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
const float batch_scaling_factor = scaling_factors[batch];
// Get the address of the first row.
const int8_t* row_ptr = matrix;
for (int row = 0; row < m_rows; ++row) {
// Initialize the dot product sum for the row to 0.
int32_t dotprod = 0;
#if defined(__GNUC__)
// Prefetch the row to cache.
__builtin_prefetch(row_ptr, 0 /* prefetch for read */,
3 /* temporal locality */);
#endif
for (int col = 0; col < m_cols; ++col, ++row_ptr) {
dotprod += (*row_ptr) * (vectors[col]);
} // for col
*result += dotprod * batch_scaling_factor;
++result;
} // for row
} // for batch
}
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result, const float* per_channel_scale,
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
bool* compute_row_sums, CpuBackendContext* context) {
if (input_offset == nullptr) {
PortableMatrixBatchVectorMultiplyAccumulate(
matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result);
return;
}
if (!compute_row_sums || *compute_row_sums) {
PortableReductionSumVector(matrix, row_sums, m_rows, m_cols);
if (compute_row_sums) {
*compute_row_sums = false;
}
}
for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
const float batch_scaling_factor = scaling_factors[batch];
const int32_t batch_offset = input_offset[batch];
const int8_t* row_ptr = matrix;
for (int row = 0; row < m_rows; ++row) {
int32_t dotprod = 0;
float scale = batch_scaling_factor;
if (per_channel_scale) {
scale *= per_channel_scale[row];
}
#if defined(__GNUC__)
// Prefetch the row to cache.
__builtin_prefetch(row_ptr, 0 /* prefetch for read */,
3 /* temporal locality */);
#endif
for (int col = 0; col < m_cols; ++col, ++row_ptr) {
dotprod += (*row_ptr) * vectors[col];
} // for col
dotprod -= row_sums[row] * batch_offset;
*result += dotprod * scale;
++result;
} // for row
} // for batch
}
void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
const int kBlockSize = 4;
TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
for (int batch = 0; batch < n_batch; batch++) {
const float* matrix_ptr = matrix;
for (int row = 0; row < m_rows; row++) {
float dot_prod = 0.0f;
const float* vector_in_batch = vector + batch * m_cols;
for (int i = segments[row]; i < segments[row + 1]; i++) {
const int block_start_index = indices[i] * kBlockSize;
const float* vector_block_in_batch_ptr =
vector_in_batch + block_start_index;
for (int c = 0; c < kBlockSize; c++) {
dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
}
}
result[batch * m_rows + row] += dot_prod;
}
}
}
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
float* __restrict__ result) {
const int kBlockSize = 16;
TFLITE_DCHECK_EQ( // NOLINT
m_cols % kBlockSize, 0);
for (int batch = 0; batch < n_batch; batch++) {
const float* matrix_ptr = matrix;
const uint8_t* ledger_ptr = ledger;
for (int row = 0; row < m_rows; row++) {
float dot_prod = 0.0f;
int num_nonzero_blocks = *ledger_ptr++;
if (num_nonzero_blocks > 0) {
const float* vector_in_batch = vector + batch * m_cols;
for (int i = 0; i < num_nonzero_blocks; i++) {
const int block_start_index = *ledger_ptr++ * kBlockSize;
const float* vector_block_in_batch_ptr =
vector_in_batch + block_start_index;
for (int c = 0; c < kBlockSize; c++) {
dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
}
}
}
result[batch * m_rows + row] += dot_prod;
}
}
}
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
const int m_cols, const int8_t* __restrict__ vectors,
const float* scaling_factors, int n_batch, float* __restrict__ result) {
static const int kBlockSize = 16;
TFLITE_DCHECK_EQ( // NOLINT
m_cols % kBlockSize, 0);
for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
const float batch_scaling_factor = scaling_factors[batch];
const uint8_t* ledger_ptr = ledger;
// Get the address of the first row.
const int8_t* row_ptr = matrix;
for (int row = 0; row < m_rows; ++row) {
// Initialize the dot product sum for the row to 0.
int32_t dotprod = 0;
#if defined(__GNUC__)
// Prefetch the row to cache.
__builtin_prefetch(row_ptr, 0 /* prefetch for read */,
3 /* temporal locality */);
#endif
int num_nonzero_blocks = *ledger_ptr++;
for (int i = 0; i < num_nonzero_blocks; i++) {
const int block_start_index = *ledger_ptr++ * kBlockSize;
const int8_t* vector_block_ptr = vectors + block_start_index;
for (int c = 0; c < kBlockSize; c++) {
dotprod += (*row_ptr++) * (*vector_block_ptr++);
} // for block
} // for num_nonzero_blocks
result[batch * m_rows + row] += dotprod * batch_scaling_factor;
} // for row
} // for batch
}
template <typename T>
void PortableMatrixBatchVectorMultiplyAccumulateImpl(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
T* output) {
const int16_t output_max = std::numeric_limits<T>::max();
const int16_t output_min = std::numeric_limits<T>::min();
for (int batch = 0; batch < n_batch; ++batch) {
for (int row = 0; row < n_output; ++row) {
int32_t acc = bias[row];
for (int col = 0; col < n_input; ++col) {
int8_t input_val = input[batch * n_input + col];
int8_t weights_val = input_to_gate_weights[row * n_input + col];
acc += input_val * weights_val;
}
acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
acc += output_zp;
acc += output[batch * n_output + row];
if (acc > output_max) {
acc = output_max;
}
if (acc < output_min) {
acc = output_min;
}
output[batch * n_output + row] = static_cast<T>(acc);
}
}
}
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int16_t* output, CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulateImpl(
input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
n_output, output_zp, output);
}
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int8_t* output, CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulateImpl(
input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
n_output, output_zp, output);
}
void PortableMatrixBatchVectorMultiply(const int8_t* input,
int32_t input_zeropoint,
const int8_t* input_to_gate_weights,
int32_t input_to_gate_effective_scale_a,
int32_t input_to_gate_effective_scale_b,
int32_t n_batch, int32_t n_input,
int32_t n_cell, int8_t* gate_output,
int8_t gate_output_zp) {
const int32_t int8_max = std::numeric_limits<int8_t>::max();
const int32_t int8_min = std::numeric_limits<int8_t>::min();
for (int batch = 0; batch < n_batch; ++batch) {
for (int row = 0; row < n_cell; ++row) {
int32_t acc = 0;
for (int col = 0; col < n_input; ++col) {
int32_t input_val = input[batch * n_input + col];
int8_t weights_val = input_to_gate_weights[row * n_input + col];
acc += (input_val - input_zeropoint) * weights_val;
}
acc = MultiplyByQuantizedMultiplier(acc, input_to_gate_effective_scale_a,
input_to_gate_effective_scale_b);
acc += gate_output_zp;
if (acc > int8_max) {
acc = int8_max;
}
if (acc < int8_min) {
acc = int8_min;
}
gate_output[batch * n_cell + row] = static_cast<int8_t>(acc);
}
}
}
void PortableMatrixBatchVectorMultiply(
const int16_t* hidden, const int8_t* hidden_to_output_weights,
int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
int32_t n_output, int32_t output_zp, int8_t* proj_output) {
const int16_t int8_max = std::numeric_limits<int8_t>::max();
const int16_t int8_min = std::numeric_limits<int8_t>::min();
for (int batch = 0; batch < n_batch; ++batch) {
for (int row = 0; row < n_output; ++row) {
int64_t acc = gate_bias[row];
for (int col = 0; col < n_hidden; ++col) {
int16_t input_val = hidden[batch * n_hidden + col];
int8_t weights_val = hidden_to_output_weights[row * n_hidden + col];
int64_t curr = acc;
acc += input_val * weights_val;
if (input_val * weights_val > 0 && acc < curr) {
acc = std::numeric_limits<int32_t>::max();
}
if (input_val * weights_val < 0 && acc > curr) {
acc = std::numeric_limits<int32_t>::min();
}
}
acc = MultiplyByQuantizedMultiplier(acc, proj_effective_scale_a,
proj_effective_scale_b);
acc += output_zp;
if (acc > int8_max) {
acc = int8_max;
}
if (acc < int8_min) {
acc = int8_min;
}
proj_output[batch * n_output + row] = acc;
}
}
}
void PortableApplyLayerNorm(const int16_t* input,
const int16_t* layer_norm_weights,
const int32_t* bias, int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b, int32_t variance_limit,
int n_batch, int n_input, int16_t* output) {
// The square of std::pow(2, 10), which is the extra factor that makes sure
// normalized values has enough resolution.
static const int kTwoToPower20 = 1 << 20;
for (int i = 0; i < n_batch; ++i) {
int64_t sum = 0;
int64_t sum_sq = 0;
for (int j = 0; j < n_input; ++j) {
const int32_t index = i * n_input + j;
int32_t val = static_cast<int32_t>(input[index]);
sum += val;
sum_sq += val * val;
}
int32_t mean =
static_cast<int32_t>(static_cast<int64_t>(sum) * 1024 / n_input);
// TODO(b/173994730): Avoids overflow but only works for POT n_input.
int32_t temp = kTwoToPower20 / n_input;
int64_t variance =
sum_sq * temp - static_cast<int64_t>(mean) * static_cast<int64_t>(mean);
int32_t variance2 = static_cast<int32_t>(variance / kTwoToPower20);
if (variance2 < 1) {
variance2 = variance_limit;
}
int32_t stddev_inverse_a;
int stddev_inverse_b;
GetInvSqrtQuantizedMultiplierExp(variance2, /*reverse_shift*/ -1,
&stddev_inverse_a, &stddev_inverse_b);
for (int j = 0; j < n_input; ++j) {
const int32_t index = i * n_input + j;
int32_t val = static_cast<int32_t>(input[index]);
int32_t shifted = 1024 * val - mean;
int32_t rescaled = MultiplyByQuantizedMultiplier(
shifted, stddev_inverse_a, stddev_inverse_b);
// TODO(jianlijianli): Saturate this.
int64_t val3 = rescaled * layer_norm_weights[j] + bias[j];
int32_t val4 =
static_cast<int32_t>((val3 > 0 ? val3 + 512 : val3 - 512) / 1024);
int32_t val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a,
layer_norm_scale_b + 12);
val5 = std::min(std::max(kInt16Min, val5), kInt16Max);
output[index] = static_cast<int16_t>(val5);
}
}
}
void PortableApplyLayerNormFloat(const int16_t* input,
const int16_t* layer_norm_weights,
int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b,
const int32_t* bias, int n_batch, int n_input,
int16_t* output) {
const int32_t int16_max = std::numeric_limits<int16_t>::max();
const int32_t int16_min = std::numeric_limits<int16_t>::min();
const float layer_norm_scale =
layer_norm_scale_a *
std::pow(2.0, static_cast<double>(layer_norm_scale_b - 31));
const float bias_scale =
static_cast<float>(std::pow(2.0, -10)) * layer_norm_scale;
for (int batch = 0; batch < n_batch; ++batch) {
float sum = 0.0f;
float sum_sq = 0.0f;
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const float value = static_cast<float>(input[index]);
sum += value;
sum_sq += value * value;
}
const float mean = sum / n_input;
float stddev_inv = 0.0f;
const float variance = sum_sq / n_input - mean * mean;
if (variance == 0) {
stddev_inv = 1.0f / std::sqrt(1e-8f);
} else {
stddev_inv = 1.0f / std::sqrt(variance);
}
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const float normalized_value =
(static_cast<float>(input[index]) - mean) * stddev_inv;
const float weighted_normalized_value =
normalized_value * layer_norm_weights[i] * layer_norm_scale +
bias[i] * bias_scale;
const int32_t quant_output = static_cast<int32_t>(std::round(
weighted_normalized_value * static_cast<float>(std::pow(2, 12))));
output[index] = std::min(int16_max, std::max(int16_min, quant_output));
}
}
}
void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
int32_t scalar, int32_t n_row,
int32_t n_col, int32_t* output) {
for (int i = 0; i < n_row; ++i) {
int32_t row_sum = 0;
for (int j = 0; j < n_col; ++j) {
row_sum += *matrix++;
}
output[i] += row_sum * scalar;
}
}
void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output) {
for (int batch = 0; batch < n_batch; ++batch) {
for (int c = 0; c < n_input; c++) {
using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
const int index = batch * n_input + c;
F3 sigmoid_input = F3::FromRaw(input[index]);
F0 sigmoid_output = gemmlowp::logistic(sigmoid_input);
output[index] = sigmoid_output.raw();
}
}
}
void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output) {
const int32_t int16_max = std::numeric_limits<int16_t>::max();
const int32_t int16_min = std::numeric_limits<int16_t>::min();
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const float float_input =
input[index] * static_cast<float>(std::pow(2, -12));
const float float_output = 1.0f / (1.0f + std::exp(-float_input));
const int32_t quant_output = static_cast<int32_t>(
float_output * static_cast<float>(std::pow(2, 15)));
const int32_t quant_output_clamped =
std::min(int16_max, std::max(int16_min, quant_output));
output[index] = static_cast<int16_t>(quant_output_clamped);
}
}
}
template <int IntegerBits>
void PortableApplyTanhImpl(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output) {
using FX = gemmlowp::FixedPoint<std::int16_t, IntegerBits>;
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
FX tanh_input = FX::FromRaw(input[index]);
F0 tanh_output = gemmlowp::tanh(tanh_input);
output[index] = tanh_output.raw();
}
}
}
void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
int32_t n_batch, int32_t n_input, int16_t* output) {
assert(integer_bits <= 6);
#define DISPATCH_TANH(i) \
case i: \
PortableApplyTanhImpl<i>(input, n_batch, n_input, output); \
break;
switch (integer_bits) {
DISPATCH_TANH(0);
DISPATCH_TANH(1);
DISPATCH_TANH(2);
DISPATCH_TANH(3);
DISPATCH_TANH(4);
DISPATCH_TANH(5);
DISPATCH_TANH(6);
default:
return;
}
#undef DISPATCH_TANH
}
void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int32_t integer_bits,
int16_t* output) {
const int32_t int16_max = std::numeric_limits<int16_t>::max();
const int32_t int16_min = std::numeric_limits<int16_t>::min();
const double two = 2.0;
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const float float_input =
input[index] * std::pow(two, static_cast<double>(integer_bits));
const float float_output = std::tanh(float_input);
const int32_t quant_output = static_cast<int32_t>(
float_output * static_cast<float>(std::pow(2, 15)));
const int32_t quant_output_clamped =
std::min(int16_max, std::max(int16_min, quant_output));
output[index] = static_cast<int16_t>(quant_output_clamped);
}
}
}
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int shift, int16_t* output) {
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const int16_t a = input_1[index];
const int16_t b = input_2[index];
const int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
output[index] =
static_cast<int16_t>(gemmlowp::RoundingDivideByPOT(value, shift));
}
}
}
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
int32_t multiplier, int32_t shift, int32_t n_batch,
int32_t n_input, int32_t output_zp, int8_t* output) {
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const int16_t a = input_1[index];
const int16_t b = input_2[index];
int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
value = MultiplyByQuantizedMultiplier(value, multiplier, shift);
value -= output_zp;
value = std::min(std::max(static_cast<int32_t>(-128), value),
static_cast<int32_t>(127));
output[index] = static_cast<int8_t>(value);
}
}
}
void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int16_t* output) {
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
int32_t sum = input_1[index] + input_2[index];
const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum));
output[index] = static_cast<int16_t>(sum_clamped);
}
}
}
float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
int v_size) {
float result = 0.0;
for (int v = 0; v < v_size; v++) {
result += *vector1++ * *vector2++;
}
return result;
}
namespace {
inline int32_t VectorVectorDotProduct(const int16_t* vector1,
const int16_t* vector2, int v_size) {
int32_t result = 0;
for (int v = 0; v < v_size; v++) {
result += *vector1++ * *vector2++;
}
return result;
}
} // namespace
void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
const int16_t* vector2,
int v_size, int n_batch,
int32_t* result) {
for (int b = 0; b < n_batch; b++) {
result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
vector1 += v_size;
vector2 += v_size;
}
}
void PortableVectorBatchVectorCwiseProductAccumulate(
const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
int32_t multiplier, int shift, int16_t* result) {
for (int b = 0; b < n_batch; b++) {
for (int v = 0; v < v_size; v++) {
int32_t prod = vector[v] * *batch_vector++;
prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift);
int32_t output = prod + *result;
output = std::max(std::min(static_cast<int32_t>(32767), output),
static_cast<int32_t>(-32768));
*result++ = output;
}
}
}
void PortableSub1Vector(const float* vector, int v_size, float* result) {
for (int v = 0; v < v_size; v++) {
*result++ = 1.0f - *vector++;
}
}
void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result) {
static const int16_t kOne = 32767;
for (int v = 0; v < v_size; v++) {
*result++ = kOne - *vector++;
}
}
void PortableVectorScalarMultiply(const int8_t* vector, const int v_size,
const float scale, float* result) {
for (int v = 0; v < v_size; ++v) {
*result++ = scale * *vector++;
}
}
void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
float* __restrict__ output_vector,
int v_size, int n_batch) {
for (int batch = 0; batch < n_batch; ++batch) {
float sum = 0.0f;
for (int i = 0; i < v_size; ++i) {
sum += input_vector[i];
}
const float mean = sum / v_size;
float sum_diff_sq = 0.0f;
for (int i = 0; i < v_size; ++i) {
const float diff = input_vector[i] - mean;
sum_diff_sq += diff * diff;
}
const float variance = sum_diff_sq / v_size;
constexpr float kNormalizationConstant = 1e-8f;
const float stddev_inv =
1.0f / std::sqrt(variance + kNormalizationConstant);
for (int i = 0; i < v_size; ++i) {
output_vector[i] = (input_vector[i] - mean) * stddev_inv;
}
input_vector += v_size;
output_vector += v_size;
}
}
void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
const int8_t* recurrent, int8_t recurrent_zp,
int32_t input_effective_scale_a,
int32_t input_effective_scale_b,
int32_t recurrent_effective_scale_a,
int32_t recurrent_effective_scale_b,
int32_t n_batch, int32_t n_cell,
int16_t* output) {
const int32_t int16_max = std::numeric_limits<int16_t>::max();
const int32_t int16_min = std::numeric_limits<int16_t>::min();
for (int i = 0; i < n_batch * n_cell; ++i) {
int32_t x = static_cast<int32_t>(input[i]) - static_cast<int32_t>(input_zp);
int32_t h =
static_cast<int32_t>(recurrent[i]) - static_cast<int32_t>(recurrent_zp);
int32_t x_scaled = MultiplyByQuantizedMultiplier(x, input_effective_scale_a,
input_effective_scale_b);
int32_t h_scaled = MultiplyByQuantizedMultiplier(
h, recurrent_effective_scale_a, recurrent_effective_scale_b);
int32_t y = h_scaled + x_scaled;
if (y > int16_max) {
y = int16_max;
}
if (y < int16_min) {
y = int16_min;
}
output[i] = static_cast<int16_t>(y);
}
}
} // namespace tensor_utils
} // namespace tflite

View File

@@ -0,0 +1,235 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
#include <algorithm>
#include <cstdint>
#if defined(_MSC_VER)
#define __restrict__ __restrict
#endif
namespace tflite {
// Not all backends support CpuBackendContext usage, so forward declare to avoid
// pulling in its implementation.
class CpuBackendContext;
namespace tensor_utils {
template <typename T>
bool PortableIsZeroVector(const T* vector, int v_size) {
for (int i = 0; i < v_size; ++i) {
if (vector[i] != 0) {
return false;
}
}
return true;
}
void PortableSymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* min_value,
float* max_value, float* scaling_factor);
void PortableSymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float min_value,
float max_value, float* scaling_factor);
void PortableAsymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values,
float* scaling_factor, int32_t* offset);
// Multiply a matrix by a batch vector, and store results in a batch-size
// vector.
void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
int m_rows, int m_cols,
const float* vector,
int n_batch, float* result);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result, const float* per_channel_scale,
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
bool* compute_row_sums, CpuBackendContext* context);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vector, const float* scaling_factors,
int n_batch, int32_t* scratch, float* __restrict__ result,
CpuBackendContext* context);
void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const float* __restrict__ vector, int n_batch, float* __restrict__ result);
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
float* __restrict__ result);
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
const int m_cols, const int8_t* __restrict__ vectors,
const float* scaling_factors, int n_batch, float* __restrict__ result);
// Dot product of two vectors.
float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
int v_size);
void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
const int16_t* vector2,
int v_size, int n_batch,
int32_t* result);
void PortableVectorBatchVectorCwiseProductAccumulate(
const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
int32_t multiplier, int shift, int16_t* result);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int16_t* output, CpuBackendContext* context);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int8_t* output, CpuBackendContext* context);
void PortableMatrixBatchVectorMultiply(const int8_t* input,
int32_t input_zeropoint,
const int8_t* input_to_gate_weights,
int32_t input_to_gate_effective_scale_a,
int32_t input_to_gate_effective_scale_b,
int32_t n_batch, int32_t n_input,
int32_t n_cell, int8_t* gate_output,
int8_t gate_output_zp);
void PortableMatrixBatchVectorMultiply(
const int16_t* hidden, const int8_t* hidden_to_output_weights,
int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
int32_t n_output, int32_t output_zp, int8_t* proj_output);
void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
int32_t scalar, int32_t n_row,
int32_t n_col, int32_t* output);
void PortableApplyLayerNorm(const int16_t* input,
const int16_t* layer_norm_weights,
const int32_t* bias, int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b, int32_t variance_limit,
int n_batch, int n_input, int16_t* output);
void PortableApplyLayerNormFloat(const int16_t* input,
const int16_t* layer_norm_weights,
int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b,
const int32_t* bias, int n_batch, int n_input,
int16_t* output);
void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output);
void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output);
void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
int32_t n_batch, int32_t n_input, int16_t* output);
void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int32_t integer_bits,
int16_t* output);
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int shift, int16_t* output);
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
int32_t multiplier, int32_t shift, int32_t n_batch,
int32_t n_input, int32_t output_zp, int8_t* output);
void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int16_t* output);
template <typename T>
void PortableCwiseClipping(T* vector, const int v_size,
const T& clipping_value) {
for (int i = 0; i < v_size; i++) {
vector[i] = std::max(std::min(clipping_value, vector[i]),
static_cast<T>(-clipping_value));
}
}
// Batch vector initialization with another vector.
void PortableVectorBatchVectorAssign(const float* vector, int v_size,
int n_batch, float* batch_vector);
// Compute "1.0f - elements of vector" (used in CIFG).
void PortableSub1Vector(const float* vector, int v_size, float* result);
void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result);
// Multiply all elements of vector with a scalar.
void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
float* result);
// Reduce-sum on a vector:
// input_vector: pointer to input vector.
// output_vector: pointer to vector.
// output_size: output vector size.
// reduction_size: number of consecutive elements from input vector which are
// added to get one element of output.
template <typename INPUT, typename OUTPUT>
void PortableReductionSumVector(const INPUT* input_vector,
OUTPUT* output_vector, int output_size,
int reduction_size) {
for (int o = 0; o < output_size; o++) {
OUTPUT result = 0;
for (int r = 0; r < reduction_size; r++) {
result += input_vector[r];
}
output_vector[o] = result;
input_vector += reduction_size;
}
}
// Layer norm for each batch.
void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
float* __restrict__ output_vector,
int v_size, int n_batch);
// Saturate Add.
void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
const int8_t* recurrent, int8_t recurrent_zp,
int32_t input_effective_scale_a,
int32_t input_effective_scale_b,
int32_t recurrent_effective_scale_a,
int32_t recurrent_effective_scale_b,
int32_t n_batch, int32_t n_cell,
int16_t* output);
} // namespace tensor_utils
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_

View File

@@ -23,6 +23,25 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
// Check if the reduction at index is the first one along the dimensions given
// in axis.
inline bool IsFirstReduction(const int* index, const int num_axis,
const int* axis) {
if (num_axis == 0) {
return true;
}
TFLITE_DCHECK(index != nullptr);
TFLITE_DCHECK(axis != nullptr);
for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
if (index[axis[axis_idx]] != 0) {
return false;
}
}
return true;
}
namespace tflite {
namespace reference_ops {
@@ -35,8 +54,7 @@ inline bool Reduce(const In* input_data, const int* input_dims,
const int* output_dims, const int input_num_dims,
const int output_num_dims, const int* axis,
const int num_axis, int* input_iter,
Out reducer(const Out current, const In in),
Out* output_data) {
Out reducer(Out current, const In in), Out* output_data) {
// Reset input iterator.
for (int idx = 0; idx < input_num_dims; ++idx) {
input_iter[idx] = 0;
@@ -53,6 +71,37 @@ inline bool Reduce(const In* input_data, const int* input_dims,
return true;
}
// Similar to above Reduce function but takes two reducer functions.
// The 'reducer_first' is called with the first value of the reduction,
// 'reducer_next' is then called for all the others.
template <typename In, typename Out>
inline bool Reduce(const In* input_data, const int* input_dims,
const int* output_dims, const int input_num_dims,
const int output_num_dims, const int* axis,
const int num_axis, int* input_iter,
const std::function<Out(In in)>& reducer_first,
const std::function<Out(Out current, In in)>& reducer_next,
Out* output_data) {
// Reset input iterator.
for (int idx = 0; idx < input_num_dims; ++idx) {
input_iter[idx] = 0;
}
// Iterate through input_data.
do {
size_t input_offset =
ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
input_iter, num_axis, axis);
if (IsFirstReduction(input_iter, num_axis, axis)) {
output_data[output_offset] = reducer_first(input_data[input_offset]);
} else {
output_data[output_offset] =
reducer_next(output_data[output_offset], input_data[input_offset]);
}
} while (NextIndex(input_num_dims, input_dims, input_iter));
return true;
}
// This method parses the input 'axis' to remove duplicates and handle negative
// values, and returns a valid 'out_axis'
inline bool ResolveAxis(const int num_dims, const int* axis,
@@ -111,7 +160,8 @@ inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
for (int idx = 0; idx < num_dims; ++idx) {
size_t current = static_cast<size_t>(dims[idx]);
// Overflow prevention.
if (num_elements > std::numeric_limits<size_t>::max() / current) {
if (current > 0 &&
num_elements > std::numeric_limits<size_t>::max() / current) {
return false;
}
num_elements *= current;
@@ -132,17 +182,20 @@ inline bool ReduceGeneric(const T* input_data, const int* input_dims,
bool keep_dims, int* temp_index, int* resolved_axis,
T init_value,
T reducer(const T current, const T in)) {
// Return early when input shape has zero dim.
for (int i = 0; i < input_num_dims; ++i) {
if (input_dims[i] == 0) return true;
}
// Reset output data.
if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
output_data)) {
return false;
}
// Return early when input shape has zero dim. This is done after initializing
// data for output tensor because there are cases that the input tensor is
// empty but output tensor is not. In that case, output tensor should be
// filled with init_value.
for (int i = 0; i < input_num_dims; ++i) {
if (input_dims[i] == 0) return true;
}
// Resolve axis.
int num_resolved_axis = 0;
if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
@@ -290,9 +343,9 @@ inline void Mean(const tflite::MeanParams& op_params,
constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
int32_t bias =
output_zero_point -
static_cast<int32_t>(input_zero_point * input_scale / output_scale);
float temp = input_zero_point * input_scale / output_scale;
temp = temp > 0 ? temp + 0.5f : temp - 0.5f;
int32_t bias = output_zero_point - static_cast<int32_t>(temp);
double real_scale =
static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
@@ -353,6 +406,14 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
temp_sum[idx] = U();
}
// Return early when input shape has zero dim. This is done after initializing
// data for output tensor because there are cases that the input tensor is
// empty but output tensor is not. In that case, output tensor should be
// filled with init_value.
for (int i = 0; i < input_num_dims; ++i) {
if (input_dims[i] == 0) return true;
}
// Resolve axis.
int num_resolved_axis = 0;
if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
@@ -405,6 +466,57 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
return true;
}
template <typename T>
inline bool QuantizedReduceProd(const T* input_data, int32_t input_zero_point,
const RuntimeShape& input_shape, T* output_data,
int32_t output_zero_point,
const RuntimeShape& output_shape,
const int* axis,
const int64_t num_axis_dimensions,
bool keep_dims, int* temp_index,
int* resolved_axis, int32_t* temp_prod,
int32_t scaling_multiplier, int scaling_shift) {
const int32_t kMinValue = std::numeric_limits<T>::min();
const int32_t kMaxValue = std::numeric_limits<T>::max();
// Resolve axis.
int num_resolved_axis = 0;
if (!ResolveAxis(input_shape.DimensionsCount(), axis, num_axis_dimensions,
resolved_axis, &num_resolved_axis)) {
return false;
}
// Calculate the reduced product by rescaling each multiplication step to
// avoid an overflow.
auto reducer_first = [&](T in) -> int32_t { return in - input_zero_point; };
auto reducer_next = [&](int32_t current, T in) -> int32_t {
const int64_t result =
static_cast<int64_t>(current) * (in - input_zero_point);
return MultiplyByQuantizedMultiplier(result, scaling_multiplier,
scaling_shift);
};
if (!Reduce<T, int32_t>(
input_data, input_shape.DimsData(), output_shape.DimsData(),
input_shape.DimensionsCount(), output_shape.DimensionsCount(),
resolved_axis, num_resolved_axis, temp_index, reducer_first,
reducer_next, temp_prod)) {
return false;
}
for (int i = 0; i < output_shape.FlatSize(); i++) {
int32_t result =
MultiplyByQuantizedMultiplier(static_cast<int64_t>(temp_prod[i]),
scaling_multiplier, scaling_shift) +
output_zero_point;
result = std::min(std::max(result, kMinValue), kMaxValue);
output_data[i] = static_cast<T>(result);
}
return true;
}
} // namespace reference_ops
} // namespace tflite

View File

@@ -0,0 +1,228 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <limits>
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void ComputeInterpolationValues(const float value, const float scale,
const bool half_pixel_centers,
int32_t input_size, float* scaled_value,
int32_t* lower_bound,
int32_t* upper_bound) {
if (half_pixel_centers) {
*scaled_value = (value + 0.5f) * scale - 0.5f;
} else {
*scaled_value = value * scale;
}
float scaled_value_floor = std::floor(*scaled_value);
*lower_bound = std::max(static_cast<int32_t>(scaled_value_floor),
static_cast<int32_t>(0));
*upper_bound =
std::min(static_cast<int32_t>(std::ceil(*scaled_value)), input_size - 1);
}
template <typename T>
inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_size_shape,
const int32_t* output_size_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
// If half_pixel_centers is True, align_corners must be False.
TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners);
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_size_shape =
RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
int32_t input_height = input_shape.Dims(1);
int32_t input_width = input_shape.Dims(2);
int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2);
int32_t output_height =
output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
int32_t output_width =
output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
float height_scale = static_cast<float>(input_height) / output_height;
float width_scale = static_cast<float>(input_width) / output_width;
if (op_params.align_corners && output_height > 1) {
height_scale = static_cast<float>(input_height - 1) / (output_height - 1);
}
if (op_params.align_corners && output_width > 1) {
width_scale = static_cast<float>(input_width - 1) / (output_width - 1);
}
const float rounding_offset = std::numeric_limits<T>::is_integer ? .5f : .0f;
for (int b = 0; b < batches; ++b) {
for (int y = 0; y < output_height; ++y) {
float input_y;
int32_t y0, y1;
ComputeInterpolationValues(y, height_scale, op_params.half_pixel_centers,
input_height, &input_y, &y0, &y1);
for (int x = 0; x < output_width; ++x) {
float input_x;
int32_t x0, x1;
ComputeInterpolationValues(x, width_scale, op_params.half_pixel_centers,
input_width, &input_x, &x0, &x1);
for (int c = 0; c < depth; ++c) {
T interpolation =
static_cast<T>(input_data[Offset(input_shape, b, y0, x0, c)] *
(1 - (input_y - y0)) * (1 - (input_x - x0)) +
input_data[Offset(input_shape, b, y1, x0, c)] *
(input_y - y0) * (1 - (input_x - x0)) +
input_data[Offset(input_shape, b, y0, x1, c)] *
(1 - (input_y - y0)) * (input_x - x0) +
input_data[Offset(input_shape, b, y1, x1, c)] *
(input_y - y0) * (input_x - x0) +
rounding_offset);
output_data[Offset(output_shape, b, y, x, c)] = interpolation;
}
}
}
}
}
inline void ComputeInterpolationValuesInteger(
const int32_t value, const int32_t scale_10, const bool half_pixel_centers,
int32_t input_size, int32_t* scaled_value, int32_t* lower_bound,
int32_t* upper_bound) {
if (half_pixel_centers) {
*scaled_value = value * scale_10 + scale_10 / 2 - (1 << 9);
} else {
*scaled_value = value * scale_10;
}
constexpr int32_t zero = 0;
*lower_bound = std::max(*scaled_value / (1 << 10), zero);
*upper_bound =
std::min((*scaled_value + (1 << 10) - 1) / (1 << 10), input_size - 1);
}
// Same as above but doesn't use any floating-point for the resize
template <typename T>
inline void ResizeBilinearInteger(
const tflite::ResizeBilinearParams& op_params,
const RuntimeShape& unextended_input_shape, const T* input_data,
const RuntimeShape& unextended_output_size_shape,
const int32_t* output_size_data,
const RuntimeShape& unextended_output_shape, T* output_data) {
// If half_pixel_centers is True, align_corners must be False.
TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners);
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_size_shape =
RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
const int32_t input_height = input_shape.Dims(1);
const int32_t input_width = input_shape.Dims(2);
const int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2);
const int32_t output_height =
output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
const int32_t output_width =
output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
int32_t height_scale_10 =
((1 << 10) * input_height + output_height / 2) / output_height;
int32_t width_scale_10 =
((1 << 10) * input_width + output_width / 2) / output_width;
if (op_params.align_corners && output_height > 1) {
height_scale_10 =
((1 << 10) * (input_height - 1) + (output_height - 1) / 2) /
(output_height - 1);
}
if (op_params.align_corners && output_width > 1) {
width_scale_10 = ((1 << 10) * (input_width - 1) + (output_width - 1) / 2) /
(output_width - 1);
}
for (int b = 0; b < batches; ++b) {
for (int y = 0; y < output_height; ++y) {
int32_t input_y, y0, y1;
ComputeInterpolationValuesInteger(y, height_scale_10,
op_params.half_pixel_centers,
input_height, &input_y, &y0, &y1);
for (int x = 0; x < output_width; ++x) {
int32_t input_x, x0, x1;
ComputeInterpolationValuesInteger(x, width_scale_10,
op_params.half_pixel_centers,
input_width, &input_x, &x0, &x1);
for (int c = 0; c < depth; ++c) {
const int64_t output_20_ll =
static_cast<int64_t>(
input_data[Offset(input_shape, b, y0, x0, c)]) *
((1 << 10) - (input_y - (1 << 10) * y0)) *
((1 << 10) - (input_x - (1 << 10) * x0));
const int64_t output_20_lu =
static_cast<int64_t>(
input_data[Offset(input_shape, b, y1, x0, c)]) *
(input_y - (1 << 10) * y0) *
((1 << 10) - (input_x - (1 << 10) * x0));
const int64_t output_20_rl =
static_cast<int64_t>(
input_data[Offset(input_shape, b, y0, x1, c)]) *
((1 << 10) - (input_y - (1 << 10) * y0)) *
(input_x - (1 << 10) * x0);
const int64_t output_20_ru =
static_cast<int64_t>(
input_data[Offset(input_shape, b, y1, x1, c)]) *
(input_y - (1 << 10) * y0) * (input_x - (1 << 10) * x0);
const int64_t output_20 =
output_20_ll + output_20_lu + output_20_rl + output_20_ru;
const int64_t round = (output_20 > 0) ? (1 << 19) : -(1 << 19);
const T interpolation =
static_cast<T>((output_20 + round) / (1 << 20));
output_data[Offset(output_shape, b, y, x, c)] = interpolation;
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_

View File

@@ -159,7 +159,7 @@ inline int16_t SoftMaxCalculateExp(const SoftmaxParams& params,
std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
static_cast<int32_t>(32767));
// apply the exp() LUT activation function
return generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
return lut_lookup(sat_sym_scaled_diff, params.exp_lut);
}
// Quantized softmax with int16_t input and int16_t output.
inline void SoftmaxInt16(const SoftmaxParams& params,
@@ -207,8 +207,8 @@ inline void SoftmaxInt16(const SoftmaxParams& params,
std::min(std::max(sym_shifted_sum, static_cast<int32_t>(-32768)),
static_cast<int32_t>(32767)));
// apply 1/(1 + x) LUT activation function
int16_t reciprocal_scale_Q015 = generic_int16_table_lookup(
sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
int16_t reciprocal_scale_Q015 =
lut_lookup(sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
// Rescale the exp_result with reciprocal
// range of output is [0, 32767] correspond to [0.0, 1.0]

View File

@@ -0,0 +1,80 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
#include <cstdint>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int input_depth = input_shape.Dims(3);
const int input_width = input_shape.Dims(2);
const int input_height = input_shape.Dims(1);
const int input_batch = input_shape.Dims(0);
const int output_depth = output_shape.Dims(3);
const int output_width = output_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_batch = output_shape.Dims(0);
const int32_t block_size = op_params.block_size;
TFLITE_DCHECK_EQ(input_width, output_width * block_size);
TFLITE_DCHECK_EQ(input_height, output_height * block_size);
TFLITE_DCHECK_EQ(input_depth * block_size * block_size, output_depth);
TFLITE_DCHECK_EQ(input_batch, output_batch);
for (int in_b = 0; in_b < input_batch; ++in_b) {
for (int in_h = 0; in_h < input_height; ++in_h) {
for (int in_w = 0; in_w < input_width; ++in_w) {
for (int in_d = 0; in_d < input_depth; ++in_d) {
const int out_d =
in_d + ((in_h % block_size) * block_size + in_w % block_size) *
input_depth;
const int out_w = in_w / block_size;
const int out_h = in_h / block_size;
const int out_b = in_b;
const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
const int output_index =
Offset(output_shape, out_b, out_h, out_w, out_d);
output_data[output_index] = input_data[input_index];
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_

View File

@@ -0,0 +1,111 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T, int N>
void TransposeImpl(const TransposeParams& params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
const int unextended_input_size = unextended_input_shape.DimensionsCount();
const int unextended_output_size = unextended_output_shape.DimensionsCount();
TFLITE_DCHECK_LE(unextended_input_size, N);
TFLITE_DCHECK_LE(unextended_output_size, N);
TFLITE_DCHECK_EQ(unextended_output_size, params.perm_count);
const int input_ext_size = N - unextended_input_size;
const int output_ext_size = N - unextended_output_size;
NdArrayDesc<N> input_desc;
NdArrayDesc<N> output_desc;
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape),
&input_desc);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
// The perm data is extended to match the output, each index incremented by
// the amount of front padding of the input shape.
int extended_perm[N];
for (int i = 0; i < N; ++i) {
extended_perm[i] = i < output_ext_size
? i
: params.perm[i - output_ext_size] + input_ext_size;
}
// Permutes the input shape so we don't need to permute the indexes inside
// the loop. Check to make sure output_dims is matching input_dims.
NdArrayDesc<N> perm_input_desc;
for (int k = 0; k < N; ++k) {
TFLITE_DCHECK_EQ(input_desc.extents[extended_perm[k]],
output_desc.extents[k]);
perm_input_desc.extents[k] = input_desc.extents[extended_perm[k]];
perm_input_desc.strides[k] = input_desc.strides[extended_perm[k]];
}
// Naive transpose loop (iterate on output index and compute input index).
auto tranpose_func = [&](int indexes[N]) {
output_data[SubscriptToIndex(output_desc, indexes)] =
input_data[SubscriptToIndex(perm_input_desc, indexes)];
};
NDOpsHelper<N>(output_desc, tranpose_func);
}
template <typename T, int N = 5>
void Transpose(const TransposeParams& params,
const RuntimeShape& unextended_input_shape, const T* input_data,
const RuntimeShape& unextended_output_shape, T* output_data) {
// Transpose kernel only does rearranging values not numeric evaluations on
// each cell. It's safe to implement per size of scalar type and this trick
// keeps the total code size in a reasonable range.
switch (sizeof(T)) {
case 1:
TransposeImpl<int8_t, N>(params, unextended_input_shape,
reinterpret_cast<const int8_t*>(input_data),
unextended_output_shape,
reinterpret_cast<int8_t*>(output_data));
break;
case 2:
TransposeImpl<int16_t, N>(params, unextended_input_shape,
reinterpret_cast<const int16_t*>(input_data),
unextended_output_shape,
reinterpret_cast<int16_t*>(output_data));
break;
case 4:
TransposeImpl<int32_t, N>(params, unextended_input_shape,
reinterpret_cast<const int32_t*>(input_data),
unextended_output_shape,
reinterpret_cast<int32_t*>(output_data));
break;
case 8:
TransposeImpl<int64_t, N>(params, unextended_input_shape,
reinterpret_cast<const int64_t*>(input_data),
unextended_output_shape,
reinterpret_cast<int64_t*>(output_data));
break;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_

View File

@@ -400,13 +400,22 @@ inline size_t ReducedOutputOffset(const int num_dims, const int* dims,
return offset;
}
// Since tensors with '0' in their shape are valid in TF, these offset functions
// allow that as long as the corresponding index is also 0. It is upto the
// calling ops to ensure that they perform verification checks on tensor shapes
// if they don't support a particular behavior.
inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) {
TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4);
const int* dims_data = reinterpret_cast<const int*>(shape.DimsDataUpTo5D());
TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]);
TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]);
TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]);
TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]);
TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) ||
(i0 >= 0 && i0 < dims_data[0]));
TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) ||
(i1 >= 0 && i1 < dims_data[1]));
TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) ||
(i2 >= 0 && i2 < dims_data[2]));
TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) ||
(i3 >= 0 && i3 < dims_data[3]));
return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
}
@@ -414,21 +423,34 @@ inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3,
int i4) {
TFLITE_DCHECK_EQ(shape.DimensionsCount(), 5);
const int* dims_data = reinterpret_cast<const int*>(shape.DimsDataUpTo5D());
TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]);
TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]);
TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]);
TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]);
TFLITE_DCHECK(i4 >= 0 && i4 < dims_data[4]);
TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) ||
(i0 >= 0 && i0 < dims_data[0]));
TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) ||
(i1 >= 0 && i1 < dims_data[1]));
TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) ||
(i2 >= 0 && i2 < dims_data[2]));
TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) ||
(i3 >= 0 && i3 < dims_data[3]));
TFLITE_DCHECK((dims_data[4] == 0 && i4 == 0) ||
(i4 >= 0 && i4 < dims_data[4]));
return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) *
dims_data[4] +
i4;
}
inline int Offset(const RuntimeShape& shape, int* index) {
return Offset(shape, index[0], index[1], index[2], index[3]);
}
inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) {
TFLITE_DCHECK(i0 >= 0 && i0 < dims.sizes[0]);
TFLITE_DCHECK(i1 >= 0 && i1 < dims.sizes[1]);
TFLITE_DCHECK(i2 >= 0 && i2 < dims.sizes[2]);
TFLITE_DCHECK(i3 >= 0 && i3 < dims.sizes[3]);
TFLITE_DCHECK((i0 == 0 && dims.sizes[0] == 0) ||
(i0 >= 0 && i0 < dims.sizes[0]));
TFLITE_DCHECK((i1 == 0 && dims.sizes[1] == 0) ||
(i1 >= 0 && i1 < dims.sizes[1]));
TFLITE_DCHECK((i2 == 0 && dims.sizes[2] == 0) ||
(i2 >= 0 && i2 < dims.sizes[2]));
TFLITE_DCHECK((i3 == 0 && dims.sizes[3] == 0) ||
(i3 >= 0 && i3 < dims.sizes[3]));
return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] +
i3 * dims.strides[3];
}
@@ -437,10 +459,6 @@ inline int Offset(const Dims<4>& dims, int* index) {
return Offset(dims, index[0], index[1], index[2], index[3]);
}
inline int Offset(const RuntimeShape& shape, int* index) {
return Offset(shape, index[0], index[1], index[2], index[3]);
}
// Get array size, DCHECKing that the dim index is in range.
//
// Note that this will be phased out with Dims<4>, since RuntimeShape::Dims()
@@ -602,6 +620,58 @@ inline int MatchingFlatSize(const Dims<N>& dims, const Dims<N>& check_dims_0,
return MatchingFlatSize(dims, check_dims_1, check_dims_2, check_dims_3);
}
// Flat size calculation, checking if their extended shapes match.
inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0) {
const int shape_dims = shape.DimensionsCount();
const int check_shape_0_dims = check_shape_0.DimensionsCount();
const int min_dims = std::min(shape_dims, check_shape_0_dims);
for (int i = 0; i < min_dims; ++i) {
TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i),
check_shape_0.Dims(check_shape_0_dims - 1 - i));
}
for (int i = min_dims; i < shape_dims; ++i) {
TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i), 1);
}
for (int i = min_dims; i < check_shape_0_dims; ++i) {
TFLITE_DCHECK_EQ(check_shape_0.Dims(check_shape_0_dims - 1 - i), 1);
}
return shape.FlatSize();
}
inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0,
const RuntimeShape& check_shape_1) {
const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1),
flat_size);
return flat_size;
}
inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0,
const RuntimeShape& check_shape_1,
const RuntimeShape& check_shape_2) {
const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
TFLITE_DCHECK_EQ(
MatchingExtendedShapeFlatSize(shape, check_shape_1, check_shape_2),
flat_size);
return flat_size;
}
inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0,
const RuntimeShape& check_shape_1,
const RuntimeShape& check_shape_2,
const RuntimeShape& check_shape_3) {
const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1,
check_shape_2, check_shape_3),
flat_size);
return flat_size;
}
// Data is required to be contiguous, and so many operators can use either the
// full array flat size or the flat size with one dimension skipped (commonly
// the depth).
@@ -885,6 +955,8 @@ struct Conv3DParams {
float float_activation_max;
};
typedef Conv3DParams Conv3DTransposeParams;
struct DepthToSpaceParams {
int32_t block_size;
};
@@ -1019,9 +1091,9 @@ struct PackParams {
struct PadParams {
int8_t left_padding_count;
int32_t left_padding[4];
int32_t left_padding[5];
int8_t right_padding_count;
int32_t right_padding[4];
int32_t right_padding[5];
ResizingCategory resizing_category;
};
@@ -1196,6 +1268,23 @@ inline void GetActivationParams(const P& params, int64_t* min, int64_t* max) {
*min = params.int64_activation_min;
*max = params.int64_activation_max;
}
// Type trait to check of given type has size smaller than 4 bytes.
template <typename T>
struct is_small_integer
: public std::integral_constant<bool,
std::is_same<T, int8_t>::value ||
std::is_same<T, uint8_t>::value ||
std::is_same<T, int16_t>::value ||
std::is_same<T, uint16_t>::value> {};
// Type trait to check of given type is int32 or int64.
template <typename T>
struct is_int32_or_int64
: public std::integral_constant<bool, std::is_same<T, int32_t>::value ||
std::is_same<T, int64_t>::value> {
};
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_

View File

@@ -119,6 +119,7 @@ TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node,
TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
int index) {
TfLiteTensor* tensor = GetMutableInput(context, node, index);
if (tensor == nullptr) return nullptr;
return tensor->is_variable ? tensor : nullptr;
}
@@ -197,7 +198,7 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
int32_t* output_activation_min, int32_t* output_activation_max,
int32_t* per_channel_multiplier, int* per_channel_shift) {
int32_t* per_channel_multiplier, int32_t* per_channel_shift) {
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
return PopulateConvolutionQuantizationParams(
@@ -212,7 +213,8 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
int32_t* output_activation_min, int32_t* output_activation_max,
int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels) {
int32_t* per_channel_multiplier, int32_t* per_channel_shift,
int num_channels) {
TF_LITE_ENSURE_EQ(context, input->quantization.type,
kTfLiteAffineQuantization);
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
@@ -333,30 +335,49 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
}
namespace {
void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
int32_t qmin, int32_t qmax,
TfLiteTensor* output,
int32_t* act_min, int32_t* act_max) {
inline TfLiteStatus Quantize(TfLiteContext* context, float scale,
int32_t zero_point, float f, int32_t& q) {
const float tmp = TfLiteRound(f / scale);
const bool no_integer_overflow_from_quantization =
(tmp >= static_cast<float>(std::numeric_limits<int32_t>::min()) &&
tmp <= static_cast<float>(std::numeric_limits<int32_t>::max()));
TF_LITE_ENSURE(context, no_integer_overflow_from_quantization);
q = zero_point + static_cast<int32_t>(tmp);
return kTfLiteOk;
}
TfLiteStatus CalculateActivationRangeQuantizedImpl(
TfLiteContext* context, TfLiteFusedActivation activation, int32_t qmin,
int32_t qmax, TfLiteTensor* output, int32_t* act_min, int32_t* act_max) {
const auto scale = output->params.scale;
const auto zero_point = output->params.zero_point;
auto quantize = [scale, zero_point](float f) {
return zero_point + static_cast<int32_t>(TfLiteRound(f / scale));
};
int32_t tmp_q;
if (activation == kTfLiteActRelu) {
*act_min = std::max(qmin, quantize(0.0));
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 0.0, tmp_q));
*act_min = std::max(qmin, tmp_q);
*act_max = qmax;
} else if (activation == kTfLiteActRelu6) {
*act_min = std::max(qmin, quantize(0.0));
*act_max = std::min(qmax, quantize(6.0));
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 0.0, tmp_q));
*act_min = std::max(qmin, tmp_q);
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 6.0, tmp_q));
*act_max = std::min(qmax, tmp_q);
} else if (activation == kTfLiteActReluN1To1) {
*act_min = std::max(qmin, quantize(-1.0));
*act_max = std::min(qmax, quantize(1.0));
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, -1.0, tmp_q));
*act_min = std::max(qmin, tmp_q);
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 1.0, tmp_q));
*act_max = std::min(qmax, tmp_q);
} else {
*act_min = qmin;
*act_max = qmax;
}
return kTfLiteOk;
}
} // namespace
@@ -380,9 +401,8 @@ TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
TF_LITE_ENSURE(context, false);
}
CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
act_max);
return kTfLiteOk;
return CalculateActivationRangeQuantizedImpl(context, activation, qmin, qmax,
output, act_min, act_max);
}
bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
@@ -412,18 +432,15 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteIntArray** output_shape) {
int dims1 = NumDimensions(input1);
int dims2 = NumDimensions(input2);
int out_dims = std::max(dims1, dims2);
if (NumElements(input1) == 0) {
*output_shape = TfLiteIntArrayCopy(input1->dims);
return kTfLiteOk;
}
const int dims1 = NumDimensions(input1);
const int dims2 = NumDimensions(input2);
const int out_dims = std::max(dims1, dims2);
std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
for (int i = 0; i < out_dims; ++i) {
int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
if (!(d1 == d2 || d1 == 1 || d2 == 1)) {
context->ReportError(context,
"Given shapes, %s and %s, are not broadcastable.",
@@ -431,8 +448,13 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
GetShapeDebugString(input2->dims).c_str());
return kTfLiteError;
}
if (d1 == 0 || d2 == 0) {
shape->data[out_dims - i - 1] = 0;
} else {
shape->data[out_dims - i - 1] = std::max(d1, d2);
}
}
*output_shape = shape.release();
return kTfLiteOk;
}
@@ -442,17 +464,20 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
const TfLiteTensor* input2,
const TfLiteTensor* input3,
TfLiteIntArray** output_shape) {
int dims1 = NumDimensions(input1);
int dims2 = NumDimensions(input2);
int dims3 = NumDimensions(input3);
int out_dims = std::max(std::max(dims1, dims2), dims3);
const int dims1 = NumDimensions(input1);
const int dims2 = NumDimensions(input2);
const int dims3 = NumDimensions(input3);
const int out_dims = std::max(std::max(dims1, dims2), dims3);
std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
for (int i = 0; i < out_dims; ++i) {
int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
const int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
const int min_value = std::min(std::min(d1, d2), d3);
int max_value = std::max(std::max(d1, d2), d3);
// If one dimention is 0, others must be 0 or 1.
if (min_value == 0) max_value = 0;
if (!(d1 == 1 || d1 == max_value) || !(d2 == 1 || d2 == max_value) ||
!(d3 == 1 || d3 == max_value)) {
context->ReportError(
@@ -473,42 +498,42 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
int TfLiteTypeGetSize(TfLiteType type) {
switch (type) {
case kTfLiteUInt8:
TF_LITE_ASSERT_EQ(sizeof(uint8_t), 1);
static_assert(sizeof(uint8_t) == 1, "");
return 1;
case kTfLiteInt8:
TF_LITE_ASSERT_EQ(sizeof(int8_t), 1);
static_assert(sizeof(int8_t) == 1, "");
return 1;
case kTfLiteBool:
return sizeof(bool);
case kTfLiteInt16:
TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
static_assert(sizeof(int16_t) == 2, "");
return 2;
case kTfLiteFloat16:
TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
static_assert(sizeof(int16_t) == 2, "");
return 2;
case kTfLiteFloat32:
TF_LITE_ASSERT_EQ(sizeof(float), 4);
static_assert(sizeof(float) == 4, "");
return 4;
case kTfLiteInt32:
TF_LITE_ASSERT_EQ(sizeof(int32_t), 4);
static_assert(sizeof(int32_t) == 4, "");
return 4;
case kTfLiteUInt32:
TF_LITE_ASSERT_EQ(sizeof(uint32_t), 4);
static_assert(sizeof(uint32_t) == 4, "");
return 4;
case kTfLiteInt64:
TF_LITE_ASSERT_EQ(sizeof(int64_t), 8);
static_assert(sizeof(int64_t) == 8, "");
return 8;
case kTfLiteUInt64:
TF_LITE_ASSERT_EQ(sizeof(uint64_t), 8);
static_assert(sizeof(uint64_t) == 8, "");
return 8;
case kTfLiteFloat64:
TF_LITE_ASSERT_EQ(sizeof(double), 8);
static_assert(sizeof(double) == 8, "");
return 8;
case kTfLiteComplex64:
TF_LITE_ASSERT_EQ(sizeof(std::complex<float>), 8);
static_assert(sizeof(std::complex<float>) == 8, "");
return 8;
case kTfLiteComplex128:
TF_LITE_ASSERT_EQ(sizeof(std::complex<double>), 16);
static_assert(sizeof(std::complex<double>) == 16, "");
return 16;
default:
return 0;

View File

@@ -214,14 +214,15 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
int32_t* output_activation_min, int32_t* output_activation_max,
int32_t* per_channel_multiplier, int* per_channel_shift);
int32_t* per_channel_multiplier, int32_t* per_channel_shift);
TfLiteStatus PopulateConvolutionQuantizationParams(
TfLiteContext* context, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
int32_t* output_activation_min, int32_t* output_activation_max,
int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels);
int32_t* per_channel_multiplier, int32_t* per_channel_shift,
int num_channels);
// Calculates the multiplication factor for a quantized convolution (or
// quantized depthwise convolution) involving the given tensors. Returns an

View File

@@ -15,69 +15,24 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
#define TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
// If we're on a platform without standard IO functions, fall back to a
// non-portable function.
#ifdef TF_LITE_MCU_DEBUG_LOG
#include "tensorflow/lite/micro/debug_log.h"
#define DEBUG_LOG(x) \
do { \
DebugLog(x); \
} while (0)
inline void InfiniteLoop() {
DEBUG_LOG("HALTED\n");
#if !defined(TF_LITE_MCU_DEBUG_LOG)
#include <cstdlib>
#define TFLITE_ABORT abort()
#else
inline void AbortImpl() {
DebugLog("HALTED\n");
while (1) {
}
}
#define TFLITE_ABORT AbortImpl();
#endif
#define TFLITE_ABORT InfiniteLoop();
#else // TF_LITE_MCU_DEBUG_LOG
#include <cstdio>
#include <cstdlib>
#define DEBUG_LOG(x) \
do { \
fprintf(stderr, "%s", (x)); \
} while (0)
// Report Error for unsupported type by op 'op_name' and returns kTfLiteError.
#define TF_LITE_UNSUPPORTED_TYPE(context, type, op_name) \
do { \
TF_LITE_KERNEL_LOG((context), "%s:%d Type %s is unsupported by op %s.", \
__FILE__, __LINE__, TfLiteTypeGetName(type), \
(op_name)); \
return kTfLiteError; \
} while (0)
#define TFLITE_ABORT abort()
#endif // TF_LITE_MCU_DEBUG_LOG
#if defined(NDEBUG) || defined(ARDUINO)
#if defined(NDEBUG)
#define TFLITE_ASSERT_FALSE (static_cast<void>(0))
#else
#define TFLITE_ASSERT_FALSE TFLITE_ABORT
#endif
#define TF_LITE_FATAL(msg) \
do { \
DEBUG_LOG(msg); \
DEBUG_LOG("\nFATAL\n"); \
TFLITE_ABORT; \
} while (0)
#define TF_LITE_ASSERT(x) \
do { \
if (!(x)) TF_LITE_FATAL(#x); \
} while (0)
#define TF_LITE_ASSERT_EQ(x, y) \
do { \
if ((x) != (y)) TF_LITE_FATAL(#x " didn't equal " #y); \
} while (0)
#endif // TENSORFLOW_LITE_KERNELS_OP_MACROS_H_

View File

@@ -20,7 +20,6 @@ limitations under the License.
namespace tflite {
// TODO(renjieliu): Migrate others to use ComputePaddingWithLeftover.
inline int ComputePadding(int stride, int dilation_rate, int in_size,
int filter_size, int out_size) {
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
@@ -45,6 +44,11 @@ inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
inline int ComputeOutSize(TfLitePadding padding, int image_size,
int filter_size, int stride, int dilation_rate = 1) {
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
// TODO(b/186448822): This uses 0 since the function has no other way to
// report error case
if (stride == 0) return 0;
switch (padding) {
case kTfLitePaddingSame:
return (image_size + stride - 1) / stride;

View File

@@ -32,14 +32,18 @@ AllOpsResolver::AllOpsResolver() {
AddConcatenation();
AddConv2D();
AddCos();
AddCumSum();
AddDepthToSpace();
AddDepthwiseConv2D();
AddDequantize();
AddDetectionPostprocess();
AddDiv();
AddElu();
AddEqual();
AddEthosU();
AddExpandDims();
AddFloor();
AddFloorDiv();
AddFloorMod();
AddFullyConnected();
AddGreater();
AddGreaterEqual();
@@ -70,6 +74,7 @@ AllOpsResolver::AllOpsResolver() {
AddRelu();
AddRelu6();
AddReshape();
AddResizeBilinear();
AddResizeNearestNeighbor();
AddRound();
AddRsqrt();
@@ -77,6 +82,7 @@ AllOpsResolver::AllOpsResolver() {
AddSin();
AddSoftmax();
AddSpaceToBatchNd();
AddSpaceToDepth();
AddSplit();
AddSplitV();
AddSqrt();
@@ -87,6 +93,7 @@ AllOpsResolver::AllOpsResolver() {
AddSvdf();
AddTanh();
AddTransposeConv();
AddTranspose();
AddUnpack();
}

View File

@@ -0,0 +1,64 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/flatbuffer_utils.h"
namespace tflite {
FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size)
: flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {}
int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadInt64(elem, byte_width_);
}
uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadUInt64(elem, byte_width_);
}
int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const {
return static_cast<int32_t>(ElementAsInt64(i));
}
bool FlexbufferWrapper::ElementAsBool(size_t i) const {
return static_cast<bool>(ElementAsUInt64(i));
}
double FlexbufferWrapper::ElementAsDouble(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadDouble(elem, byte_width_);
}
float FlexbufferWrapper::ElementAsFloat(size_t i) const {
return static_cast<float>(FlexbufferWrapper::ElementAsDouble(i));
}
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
uint32_t NumSubgraphOperators(const SubGraph* subgraph) {
if (subgraph->operators() != nullptr) {
return subgraph->operators()->size();
} else {
return 0;
}
}
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
return NumSubgraphOperators(subgraph);
}
} // namespace tflite

View File

@@ -0,0 +1,56 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
#include "flatbuffers/flatbuffers.h"
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Kernels use flexbuffers::Map to pack their init parameters in a tflite file,
// with the parameter names as map keys and the parameter values as the
// corresponding map values.
// Accessing the map values using the flexbuffers:Map class is inline heavy,
// which can cause the code size to bloat beyond what's reasonable for a micro
// application. Use this class instead, when possible.
// FlexbufferWrapper takes advantage of the following properties of
// flexbuffers::Map:
// 1. It can be viewed as a flexbuffers::Vector of the values.
// 2. The values in the vector are ordered alphabetically by their keys.
// 3. All integer and Boolean values are stored as 64-bit numbers.
// 4. All floating point values are stored as double precision numbers.
// The properties are mentioned in the flexbuffers docs, but we rely on
// a unit test to catch design changes.
class FlexbufferWrapper : public flexbuffers::Vector {
public:
// Construct with a serialized flexbuffer 'buffer' of 'size' bytes
explicit FlexbufferWrapper(const uint8_t* buffer, size_t size);
int64_t ElementAsInt64(size_t i) const;
uint64_t ElementAsUInt64(size_t i) const;
int32_t ElementAsInt32(size_t i) const;
bool ElementAsBool(size_t i) const;
double ElementAsDouble(size_t i) const;
float ElementAsFloat(size_t i) const;
};
// Return the number of operators in a subgraph tflite
uint32_t NumSubgraphOperators(const SubGraph* subgraph);
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx);
} // namespace tflite
#endif // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/activations.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
@@ -25,141 +27,21 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
struct ReluOpData {
ReluParams params;
};
struct Relu6OpData {
int8_t six_int8;
int8_t zero_int8;
uint8_t six_uint8;
uint8_t zero_uint8;
};
} // namespace
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
template <typename T>
inline void ReluQuantized(const ReluOpData& data,
const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const T* input_data,
T* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t val = static_cast<int32_t>(input_data[i]);
int32_t clamped =
data.params.output_offset +
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
data.params.output_multiplier,
data.params.output_shift);
clamped = std::max(data.params.quantized_activation_min, clamped);
clamped = std::min(data.params.quantized_activation_max, clamped);
output_data[i] = static_cast<T>(clamped);
}
}
template <typename T>
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
const RuntimeShape input_shape = GetTensorShape(input);
const RuntimeShape output_shape = GetTensorShape(output);
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
&data->params.output_shift);
data->params.quantized_activation_min = std::max(
static_cast<int32_t>(std::numeric_limits<T>::min()),
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float lower = 0.0f;
const float clamped = val < lower ? lower : val;
output_data[i] = clamped;
}
}
inline void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 6.0f;
const float lower = 0.0f;
const float clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
template <typename Q>
inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
const Q* input_data,
const RuntimeShape& output_shape, Q* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const Q val = input_data[i];
const Q clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
} else if (input->type == kTfLiteUInt8) {
CalculateReluOpData<uint8_t>(input, output, data);
}
return kTfLiteOk;
}
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
@@ -171,19 +53,12 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
case kTfLiteInt8: {
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
tflite::ReluQuantized(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
TfLiteTypeGetName(input->type));
@@ -197,34 +72,14 @@ void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
if (input->type == kTfLiteInt8) {
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_int8 = input->params.zero_point;
} else if (input->type == kTfLiteUInt8) {
data->six_uint8 = FloatToQuantizedType<uint8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_uint8 = input->params.zero_point;
}
return kTfLiteOk;
}
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
@@ -236,21 +91,13 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
case kTfLiteInt8: {
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
Relu6Quantized(data.zero_int8, data.six_int8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
TfLiteTypeGetName(input->type));
@@ -259,13 +106,13 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
}
}
} // namespace activations
} // namespace
TfLiteRegistration Register_RELU() {
return {/*init=*/activations::ReluInit,
return {/*init=*/ReluInit,
/*free=*/nullptr,
/*prepare=*/activations::ReluPrepare,
/*invoke=*/activations::ReluEval,
/*prepare=*/ReluPrepare,
/*invoke=*/ReluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
@@ -273,16 +120,14 @@ TfLiteRegistration Register_RELU() {
}
TfLiteRegistration Register_RELU6() {
return {/*init=*/activations::Relu6Init,
return {/*init=*/Relu6Init,
/*free=*/nullptr,
/*prepare=*/activations::Relu6Prepare,
/*invoke=*/activations::Relu6Eval,
/*prepare=*/Relu6Prepare,
/*invoke=*/Relu6Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,63 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
extern const int kActivationsInputTensor;
extern const int kActivationsOutputTensor;
struct ReluOpData {
ReluParams params;
};
struct Relu6OpData {
int8_t six_int8;
int8_t zero_int8;
};
void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const int8_t* input_data,
int8_t* output_data);
template <typename T>
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data);
void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data);
void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data);
void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data);
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node);
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_

View File

@@ -0,0 +1,148 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <algorithm>
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activations.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
const int kActivationsInputTensor = 0;
const int kActivationsOutputTensor = 0;
void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const int8_t* input_data,
int8_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t val = static_cast<int32_t>(input_data[i]);
int32_t clamped =
data.params.output_offset +
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
data.params.output_multiplier,
data.params.output_shift);
clamped = std::max(data.params.quantized_activation_min, clamped);
clamped = std::min(data.params.quantized_activation_max, clamped);
output_data[i] = static_cast<int8_t>(clamped);
}
}
template <typename T>
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
const RuntimeShape input_shape = GetTensorShape(input);
const RuntimeShape output_shape = GetTensorShape(output);
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
&data->params.output_shift);
data->params.quantized_activation_min = std::max(
static_cast<int32_t>(std::numeric_limits<T>::min()),
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float lower = 0.0f;
const float clamped = val < lower ? lower : val;
output_data[i] = clamped;
}
}
void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 6.0f;
const float lower = 0.0f;
const float clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int8_t val = input_data[i];
const int8_t clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kActivationsOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
}
return kTfLiteOk;
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
if (input->type == kTfLiteInt8) {
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_int8 = input->params.zero_point;
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -66,12 +66,12 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
OpData* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = 20;
data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
const double twice_max_input_scale =
2 * static_cast<double>(
std::max(input1->params.scale, input2->params.scale));
@@ -133,7 +133,6 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
@@ -145,12 +144,14 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
SetActivationParams(data->output_activation_min, data->output_activation_max,
&op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
switch (output->type) {
case kTfLiteInt8: {
if (need_broadcast) {
reference_integer_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
@@ -168,24 +169,32 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else {
break;
}
case kTfLiteInt16: {
if (need_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
tflite::micro::GetTensorData<int16_t>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
tflite::micro::GetTensorData<int16_t>(output),
false);
}
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
@@ -231,7 +240,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
@@ -28,6 +29,22 @@ namespace {
constexpr int kInputTensor0 = 0;
constexpr int kOutputTensor = 0;
constexpr int kAddNIntegerShift = 20;
// only used with INT8 tensors
struct OpData {
int32_t output_activation_min;
int32_t output_activation_max;
int32_t input_offset;
int32_t output_offset;
int32_t input_multiplier;
int32_t output_multiplier;
int input_shift;
int output_shift;
int left_shift;
int scratch_index;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
int num_inputs = NumInputs(node);
TF_LITE_ENSURE(context, num_inputs >= 2);
@@ -47,19 +64,61 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, i, &input));
TF_LITE_ENSURE(context, HaveSameShapes(input_tensor_first, input));
TF_LITE_ENSURE_TYPES_EQ(context, input_tensor_first->type, input->type);
// Check that all INT8 input tensors have the same zero-point and scale.
if (input_tensor_first->type == kTfLiteInt8) {
TF_LITE_ENSURE(context, input_tensor_first->params.zero_point ==
input->params.zero_point);
TF_LITE_ENSURE(context,
input_tensor_first->params.scale == input->params.scale);
}
}
if (output->type == kTfLiteFloat32) {
// Allocate scratch buffer space for pointer to each tensor's data
// and store the scratch buffer index in the node's user_data
if (output->type == kTfLiteFloat32) {
int scratch_index;
size_t scratch_size = sizeof(float*) * num_inputs;
TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena(
context, scratch_size, &scratch_index));
node->user_data =
reinterpret_cast<decltype(node->user_data)>(scratch_index);
} else if (output->type == kTfLiteInt8) {
node->user_data =
context->AllocatePersistentBuffer(context, sizeof(OpData));
OpData* data = static_cast<OpData*>(node->user_data);
// Allocate scratch buffer space for pointer to each tensor's data
// and store the scratch buffer index in OpData
size_t scratch_size = sizeof(int8_t*) * num_inputs;
TF_LITE_ENSURE_OK(
context, context->RequestScratchBufferInArena(context, scratch_size,
&data->scratch_index));
// 8bit -> 8bit general quantized path, with general rescalings
data->input_offset = -input_tensor_first->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = kAddNIntegerShift;
const double twice_max_input_scale =
2 * static_cast<double>(input_tensor_first->params.scale);
const double real_input_multiplier =
static_cast<double>(input_tensor_first->params.scale) /
twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << data->left_shift) * static_cast<double>(output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input_multiplier, &data->input_multiplier, &data->input_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, kTfLiteActNone, output, &data->output_activation_min,
&data->output_activation_max));
} else {
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
@@ -72,12 +131,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
template <typename T>
void EvalAddN(TfLiteContext* context, TfLiteNode* node,
TfLiteEvalTensor* output) {
inline const T** CopyInputsToScratchBuffer(TfLiteContext* context,
TfLiteNode* node,
const int scratch_index) {
int num_inputs = NumInputs(node);
int scratch_index =
static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
void* scratch_buffer = context->GetScratchBuffer(context, scratch_index);
const T** all_inputs = static_cast<decltype(all_inputs)>(scratch_buffer);
for (int i = 0; i < num_inputs; i++) {
@@ -86,17 +143,56 @@ void EvalAddN(TfLiteContext* context, TfLiteNode* node,
all_inputs[i] = tflite::micro::GetTensorData<T>(next_input);
}
return all_inputs;
}
template <typename T>
void EvalAddN(TfLiteContext* context, TfLiteNode* node,
TfLiteEvalTensor* output) {
int num_inputs = NumInputs(node);
int scratch_index =
static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
const T** all_inputs =
CopyInputsToScratchBuffer<T>(context, node, scratch_index);
reference_ops::AddN<T>(tflite::micro::GetTensorShape(output), num_inputs,
all_inputs, tflite::micro::GetTensorData<T>(output));
}
template <typename T>
void EvalAddNQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteEvalTensor* output) {
int num_inputs = NumInputs(node);
OpData* data = static_cast<OpData*>(node->user_data);
const T** all_inputs =
CopyInputsToScratchBuffer<T>(context, node, data->scratch_index);
ArithmeticParams params;
params.left_shift = data->left_shift;
params.input1_offset = data->input_offset;
params.input1_multiplier = data->input_multiplier;
params.input1_shift = data->input_shift;
params.output_offset = data->output_offset;
params.output_multiplier = data->output_multiplier;
params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&params);
reference_ops::AddN(params, tflite::micro::GetTensorShape(output), num_inputs,
all_inputs, tflite::micro::GetTensorData<T>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalAddN<float>(context, node, output);
} else if (output->type == kTfLiteInt8) {
EvalAddNQuantized<int8_t>(context, node, output);
} else {
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}

View File

@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#define FLATBUFFERS_LOCALE_INDEPENDENT 0
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
@@ -22,6 +20,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/flatbuffer_utils.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
/*
@@ -56,6 +55,11 @@ namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
// Indices into the init flexbuffer's vector.
// The parameter's name is in the comment that follows.
// Elements in the vectors are ordered alphabetically by parameter name.
constexpr int kCyclesMaxIndex = 0; // 'cycles_max'
// TODO(b/149795762): Add this to TfLiteStatus enum.
constexpr TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
@@ -76,8 +80,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
if (buffer != nullptr && length > 0) {
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
op_data->cycles_max = m["cycles_max"].AsInt32();
tflite::FlexbufferWrapper wrapper(buffer_t, length);
op_data->cycles_max = wrapper.ElementAsInt32(kCyclesMaxIndex);
} else {
op_data->cycles_max = 0;
}
@@ -118,6 +122,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing
// https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing
if (output->dims->data[1] == 5 || output->dims->data[1] == 13 ||
output->dims->data[1] == 25 ||
(cb_prepare_count == 5 && output->dims->data[2] == 2 &&
output->dims->data[3] == 96)) {
op_data->cycles_max = 1;

View File

@@ -147,8 +147,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
TF_LITE_ENSURE(context,
input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
input_type == kTfLiteInt8 || input_type == kTfLiteInt32 ||
input_type == kTfLiteInt64);
input_type == kTfLiteInt8 || input_type == kTfLiteInt16 ||
input_type == kTfLiteInt32 || input_type == kTfLiteInt64);
// Output type must match input type
TF_LITE_ENSURE_EQ(context, output_type, input_type);
@@ -182,6 +182,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
switch (output_type) { // Already know in/outtypes are same.
case kTfLiteFloat32:
case kTfLiteInt16:
case kTfLiteInt32:
case kTfLiteInt64: {
data->params.axis = CalculatePositiveAxis(params->axis, output);
@@ -247,6 +248,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
case kTfLiteInt64:
EvalUnquantized<int64_t>(context, node);
break;
case kTfLiteInt16:
EvalUnquantized<int16_t>(context, node);
break;
default:
TF_LITE_KERNEL_LOG(

View File

@@ -53,7 +53,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
TF_LITE_ENSURE_MSG(
context,
input->type == filter->type ||
(input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
"Hybrid models are not supported on TFLite Micro.");
switch (input->type) { // Already know in/out types are same.
@@ -70,6 +73,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(nullptr), nullptr);
break;
}
case kTfLiteInt16: {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<std::int64_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
break;
}
case kTfLiteInt8: {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data), data.per_channel_output_multiplier,

View File

@@ -72,6 +72,21 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);
// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_CONV_2D();
#if defined(XTENSA)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 inputs and outputs.
TfLiteRegistration Register_CONV_2D_INT8REF();
#else
inline TfLiteRegistration Register_CONV_2D_INT8REF() {
return Register_CONV_2D();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_

View File

@@ -111,8 +111,7 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
context, input, filter, bias, output, params.activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
data->per_channel_output_multiplier, data->per_channel_output_shift,
output_channels));
}
@@ -155,7 +154,7 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);

View File

@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
@@ -59,36 +59,45 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
TfLiteRegistration registration,
uint8_t* output_data, float tolerance = 1e-5);
TfLiteStatus TestConvFloat(const int* input_dims_data, const float* input_data,
const int* filter_dims_data,
const float* filter_data, const int* bias_dims_data,
const float* bias_data, const int* output_dims_data,
TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data,
int* filter_dims_data, const float* filter_data,
int* bias_dims_data, const float* bias_data,
int* output_dims_data,
const float* expected_output_data,
TfLiteConvParams* conv_params,
TfLiteRegistration registration, float* output_data);
TfLiteStatus TestConvQuantizedPerLayer(
const int* input_dims_data, const float* input_data,
uint8_t* input_quantized, float input_scale, const int* filter_dims_data,
const float* filter_data, uint8_t* filter_quantized, float filter_scale,
const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized,
const int* output_dims_data, const float* expected_output_data,
uint8_t* expected_output_quantized, float output_scale,
TfLiteConvParams* conv_params, TfLiteRegistration registration,
uint8_t* output_data);
int* input_dims_data, const float* input_data, uint8_t* input_quantized,
float input_scale, int* filter_dims_data, const float* filter_data,
uint8_t* filter_quantized, float filter_scale, int* bias_dims_data,
const float* bias_data, int32_t* bias_quantized, int* output_dims_data,
const float* expected_output_data, uint8_t* expected_output_quantized,
float output_scale, TfLiteConvParams* conv_params,
TfLiteRegistration registration, uint8_t* output_data);
TfLiteStatus TestConvQuantizedPerChannel(
const int* input_dims_data, const float* input_data,
int8_t* input_quantized, float input_scale, int input_zero_point,
const int* filter_dims_data, const float* filter_data,
int8_t* filter_data_quantized, const int* bias_dims_data,
const float* bias_data, int32_t* bias_data_quantized, float* bias_scales,
int* bias_zero_points, const int* output_dims_data,
int* input_dims_data, const float* input_data, int8_t* input_quantized,
float input_scale, int input_zero_point, int* filter_dims_data,
const float* filter_data, int8_t* filter_data_quantized,
int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
float* bias_scales, int* bias_zero_points, int* output_dims_data,
const float* expected_output_data, int8_t* expected_output_data_quantized,
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
TfLiteRegistration registration, int8_t* output_data);
TfLiteStatus TestConvQuantizedPerChannel(
int* input_dims_data, const float* input_data, int16_t* input_quantized,
float input_scale, int input_zero_point, int* filter_dims_data,
const float* filter_data, int8_t* filter_data_quantized,
int* bias_dims_data, const float* bias_data,
std::int64_t* bias_data_quantized, float* bias_scales,
int* bias_zero_points, int* output_dims_data,
const float* expected_output_data, int16_t* expected_output_data_quantized,
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
TfLiteRegistration registration, int16_t* output_data);
} // namespace testing
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_

View File

@@ -0,0 +1,173 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/cumsum.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kAxisTensor = 1;
constexpr int kOutputTensor = 0;
constexpr int kCumSumIntegerShift = 20;
// only used with INT8 tensors
struct OpData {
int32_t output_activation_min;
int32_t output_activation_max;
int32_t input_offset;
int32_t output_offset;
int32_t input_multiplier;
int32_t output_multiplier;
int input_shift;
int output_shift;
int left_shift;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* axis = GetInput(context, node, kAxisTensor);
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, axis->type, kTfLiteInt32);
TF_LITE_ENSURE_EQ(context, NumElements(axis), 1);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context, HaveSameShapes(input, output));
if (output->type == kTfLiteInt8) {
node->user_data =
context->AllocatePersistentBuffer(context, sizeof(OpData));
OpData* data = static_cast<OpData*>(node->user_data);
// 8bit -> 8bit general quantized path, with general rescalings
data->input_offset = -input->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = kCumSumIntegerShift;
const double twice_max_input_scale =
2 * static_cast<double>(input->params.scale);
const double real_input_multiplier =
static_cast<double>(input->params.scale) / twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << data->left_shift) * static_cast<double>(output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input_multiplier, &data->input_multiplier, &data->input_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, kTfLiteActNone, output, &data->output_activation_min,
&data->output_activation_max));
}
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* axis_tensor =
tflite::micro::GetEvalInput(context, node, kAxisTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
auto* cs_params = static_cast<TfLiteCumsumParams*>(node->builtin_data);
auto input_shape = tflite::micro::GetTensorShape(input);
int32_t axis = *tflite::micro::GetTensorData<int32_t>(axis_tensor);
if (axis < 0) axis += input_shape.DimensionsCount();
if (axis < 0 || axis >= input_shape.DimensionsCount()) {
TF_LITE_KERNEL_LOG(context, "CUMSUM Invalid axis: %d", axis);
return kTfLiteError;
}
switch (input->type) {
case kTfLiteFloat32: {
reference_ops::CumSum(tflite::micro::GetTensorData<float>(input),
input_shape, axis, cs_params->exclusive,
cs_params->reverse,
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteInt8: {
auto* data = static_cast<OpData*>(node->user_data);
ArithmeticParams params;
params.left_shift = data->left_shift;
params.input1_offset = data->input_offset;
params.input1_multiplier = data->input_multiplier;
params.input1_shift = data->input_shift;
params.output_offset = data->output_offset;
params.output_multiplier = data->output_multiplier;
params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &params);
reference_ops::CumSum(params, tflite::micro::GetTensorData<int8_t>(input),
input_shape, axis, cs_params->exclusive,
cs_params->reverse,
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
} break;
default: {
TF_LITE_KERNEL_LOG(context,
"CUMSUM only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
}
return kTfLiteError;
}
} // namespace
TfLiteRegistration Register_CUMSUM() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,143 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
// input/output tensor shape rank associations
constexpr int kBatchRank = 0;
constexpr int kHeightRank = 1;
constexpr int kWidthRank = 2;
constexpr int kDepthRank = 3;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
auto data_type = output->type;
TF_LITE_ENSURE(context,
data_type == kTfLiteFloat32 || data_type == kTfLiteInt8);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
const int block_size = params->block_size;
TF_LITE_ENSURE(context, block_size > 0);
const int input_height = input->dims->data[kHeightRank];
const int input_width = input->dims->data[kWidthRank];
const int input_channels = input->dims->data[kDepthRank];
int output_height = input_height * block_size;
int output_width = input_width * block_size;
int output_channels = input_channels / block_size / block_size;
TF_LITE_ENSURE_EQ(context, input_height, output_height / block_size);
TF_LITE_ENSURE_EQ(context, input_width, output_width / block_size);
TF_LITE_ENSURE_EQ(context, input_channels,
output_channels * block_size * block_size);
// We must update the output tensor dimensions.
// The dims storage is expected to be the same area in memory
// for both TfLiteTensor and TfLiteEvalTensor. This is important
// because TfLiteTensor in the MicroInterpreter is a temporary
// allocation. For the KernelRunner interpreter, TfLiteEvalTensor
// is a temporary allocation. We must therefore relocate the dims
// from the FlatBuffer to the persistant storage arena.
TfLiteEvalTensor* output_eval =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
context, output, output_eval));
output->dims->data[kBatchRank] = input->dims->data[kBatchRank];
output->dims->data[kHeightRank] = output_height;
output->dims->data[kWidthRank] = output_width;
output->dims->data[kDepthRank] = output_channels;
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::DepthToSpaceParams op_params;
op_params.block_size = static_cast<int32_t>(params->block_size);
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
reference_ops::DepthToSpace(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt8:
reference_ops::DepthToSpace(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(
context, "DEPTH_TO_SPACE only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_DEPTH_TO_SPACE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -20,7 +20,6 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"

View File

@@ -18,7 +18,6 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
@@ -113,8 +112,7 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
context, input, filter, bias, output, params.activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
data->per_channel_output_multiplier, data->per_channel_output_shift,
output_channels));
}

View File

@@ -15,7 +15,6 @@ limitations under the License.
#include <numeric>
#define FLATBUFFERS_LOCALE_INDEPENDENT 0
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
@@ -117,12 +116,11 @@ struct OpData {
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
OpData* op_data = nullptr;
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
op_data = reinterpret_cast<OpData*>(
context->AllocatePersistentBuffer(context, sizeof(OpData)));

View File

@@ -1,206 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/div.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpData {
// Parameters used in the quantized paths where the output is 8bit
int32_t input1_zero_point;
int32_t input2_zero_point;
int32_t output_zero_point;
int32_t output_activation_min;
int32_t output_activation_max;
// Parameters used in all quantized paths
int32_t output_multiplier;
int output_shift;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteDivParams* params, OpData* data) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
const double real_multiplier = static_cast<double>(
input1->params.scale / (input2->params.scale * output->params.scale));
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
&data->output_shift);
data->input1_zero_point = input1->params.zero_point;
data->input2_zero_point = input2->params.zero_point;
data->output_zero_point = output->params.zero_point;
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
auto* data = static_cast<OpData*>(node->user_data);
return CalculateOpData(context, node, params, data);
}
void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
#define TF_LITE_DIV(type, opname, data_type) \
data_type output_activation_min, output_activation_max; \
CalculateActivationRange(params->activation, &output_activation_min, \
&output_activation_max); \
SetActivationParams(output_activation_min, output_activation_max, \
&op_params); \
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<data_type>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<data_type>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<data_type>(output))
bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (requires_broadcast) {
TF_LITE_DIV(reference_ops, BroadcastDivSlow, float);
} else {
TF_LITE_DIV(reference_ops, Div, float);
}
#undef TF_LITE_DIV
}
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteDivParams* params, const OpData* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
#define TF_LITE_DIV(type, opname, dtype) \
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<dtype>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<dtype>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<dtype>(output))
if (input1->type == kTfLiteInt8 && input2->type == kTfLiteInt8 &&
output->type == kTfLiteInt8) {
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
op_params.input1_offset = -data->input1_zero_point;
op_params.input2_offset = -data->input2_zero_point;
op_params.output_offset = data->output_zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (requires_broadcast) {
TF_LITE_DIV(reference_ops, BroadcastDivSlow, int8_t);
} else {
TF_LITE_DIV(reference_ops, Div, int8_t);
}
#undef TF_LITE_DIV
} else {
TF_LITE_KERNEL_LOG(
context, "Unsupported combination of input and output types in DIV.");
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
auto* data = static_cast<OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalDiv(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_OK(context, EvalQuantized(context, node, params, data,
input1, input2, output));
} else {
TF_LITE_KERNEL_LOG(context,
"DIV only supports FLOAT32, quantized INT8 "
"now, got type %s (%d).",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_DIV() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -25,6 +25,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
namespace {
@@ -45,7 +46,10 @@ using TransformFunc = float (*)(float);
template <typename T>
void PopulateLookupTable(const TfLiteTensor* input, const TfLiteTensor* output,
const TransformFunc transform, OpData* data) {
if (sizeof(T) != 1) TF_LITE_FATAL("Lookup table valid only for 8bit");
if (sizeof(T) != 1) {
MicroPrintf("Lookup table valid only for 8bit");
TFLITE_ABORT;
}
const float inverse_scale = 1 / output->params.scale;
int32_t maxval = std::numeric_limits<T>::max();

View File

@@ -0,0 +1,130 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
// Input/output tensor index.
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
return nullptr;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
template <typename T>
TfLiteStatus EvalFloorDiv(TfLiteContext* context,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
// Validate the denominator.
for (int i = 0; i < tflite::ElementCount(*input2->dims); ++i) {
if (std::equal_to<T>()(denominator_data[i], 0)) {
TF_LITE_KERNEL_LOG(context, "Division by 0");
return kTfLiteError;
}
}
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
if (requires_broadcast) {
reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
} else {
reference_ops::BinaryFunction<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input1->type) {
case kTfLiteFloat32: {
return EvalFloorDiv<float>(context, input1, input2, output);
}
default: {
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by FLOOR_DIV.",
TfLiteTypeGetName(input1->type));
return kTfLiteError;
}
}
}
} // namespace
TfLiteRegistration Register_FLOOR_DIV() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,128 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
// OLD-TODO(b/117523611): We should factor out a binary_op and put binary ops
// there.
namespace tflite {
namespace {
// Input/output tensor index.
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
// OLD-TODO(b/117912880): Support quantization.
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
return nullptr;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
template <typename T>
TfLiteStatus EvalFloorMod(TfLiteContext* context, bool requires_broadcast,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
if (requires_broadcast) {
reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
} else {
reference_ops::BinaryFunction<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32: {
return EvalFloorMod<float>(context, requires_broadcast, input1, input2,
output);
}
default: {
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by FLOOR_MOD.",
TfLiteTypeGetName(input1->type));
return kTfLiteError;
}
}
}
} // namespace
TfLiteRegistration Register_FLOOR_MOD() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -109,19 +109,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
break;
}
case kTfLiteUInt8: {
tflite::reference_ops::FullyConnected(
FullyConnectedParamsQuantized(data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
}
default: {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);

View File

@@ -65,7 +65,7 @@ TfLiteStatus CalculateOpDataFullyConnected(
// (reference or optimized) must define this function.
TfLiteRegistration Register_FULLY_CONNECTED();
#if defined(CMSIS_NN) || defined(ARDUINO)
#if defined(CMSIS_NN)
// The Arduino is a special case where we use the CMSIS kernels, but because of
// the current approach to building for Arduino, we do not support -DCMSIS_NN as
// part of the build. As a result, we use defined(ARDUINO) as proxy for the

View File

@@ -65,6 +65,11 @@ TfLiteStatus CalculateOpDataFullyConnected(
&data->output_shift);
data->input_zero_point = input->params.zero_point;
// Filter weights will always be symmetric quantized since we only support
// int8 quantization. See
// https://github.com/tensorflow/tensorflow/issues/44912 for additional
// context.
TFLITE_DCHECK(filter->params.zero_point == 0);
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;

View File

@@ -0,0 +1,222 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kInputPositions = 1;
constexpr int kOutputTensor = 0;
template <typename InputT, typename CoordsT = int32_t>
TfLiteStatus Gather(const TfLiteGatherParams* params,
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* coords, TfLiteEvalTensor* output) {
const InputT* input_data = tflite::micro::GetTensorData<InputT>(input);
const CoordsT* coords_data = tflite::micro::GetTensorData<CoordsT>(coords);
InputT* output_data = tflite::micro::GetTensorData<InputT>(output);
const TfLiteIntArray* input_dims = input->dims;
const int input_dims_size = input_dims->size;
int axis = params->axis;
if (axis < 0) {
axis += input_dims_size;
}
TFLITE_DCHECK_GE(axis, 0);
TFLITE_DCHECK_LT(axis, input_dims_size);
int batch_dims = params->batch_dims;
// batch_dims should be in range: [-rank(coords), rank(coords)].
// Negative batch_dims is added with rank of coords.
const TfLiteIntArray* coords_dims = coords->dims;
const int coords_dims_size = coords_dims->size;
if (batch_dims < 0) {
batch_dims += coords_dims_size;
}
TFLITE_DCHECK_GE(batch_dims, 0);
TFLITE_DCHECK_LT(batch_dims, input_dims_size);
TFLITE_DCHECK_LE(batch_dims, coords_dims_size);
TFLITE_DCHECK_GE(axis, batch_dims);
for (int i = 0; i < batch_dims; ++i) {
TFLITE_DCHECK_EQ(input_dims->data[i], coords_dims->data[i]);
}
const int axis_size = input_dims->data[axis];
int batch_size = 1;
for (int i = 0; i < batch_dims; ++i) {
batch_size *= input_dims->data[i];
}
int outer_size = 1;
for (int i = batch_dims; i < axis; ++i) {
outer_size *= input_dims->data[i];
}
int inner_size = 1;
for (int i = axis + 1; i < input_dims_size; ++i) {
inner_size *= input_dims->data[i];
}
int coord_size = 1;
for (int i = batch_dims; i < coords_dims_size; ++i) {
coord_size *= coords_dims->data[i];
}
for (int batch = 0; batch < batch_size; ++batch) {
for (int outer = 0; outer < outer_size; ++outer) {
for (int coord = 0; coord < coord_size; ++coord) {
TFLITE_DCHECK_GE(coords_data[coord], 0);
TFLITE_DCHECK_LT(coords_data[coord], axis_size);
std::memcpy(output_data +
(((batch * outer_size) + outer) * coord_size + coord) *
inner_size,
input_data + (((batch * outer_size) + outer) * axis_size +
coords_data[batch * coord_size + coord]) *
inner_size,
sizeof(InputT) * inner_size);
}
}
}
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const auto* params =
reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
const TfLiteTensor* coords;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputPositions, &coords));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
switch (coords->type) {
case kTfLiteInt32:
break;
default:
TF_LITE_KERNEL_LOG(context,
"Positions of type '%s' are not supported by gather.",
TfLiteTypeGetName(coords->type));
return kTfLiteError;
break;
}
// Assign to output the input type.
output->type = input->type;
// Check conditions for different types.
switch (input->type) {
case kTfLiteFloat32:
case kTfLiteInt8:
break;
default:
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by gather.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
break;
}
int axis = params->axis;
if (axis < 0) {
axis += NumDimensions(input);
}
TF_LITE_ENSURE(context, 0 <= axis && axis < NumDimensions(input));
int batch_dims = params->batch_dims;
// batch_dims should be in range: [-rank(coords), rank(coords)].
// Negative batch_dims is added with rank of coords.
if (batch_dims < 0) {
batch_dims += NumDimensions(coords);
}
TF_LITE_ENSURE(context, batch_dims <= axis);
TF_LITE_ENSURE(context, 0 <= batch_dims && batch_dims < NumDimensions(input));
TF_LITE_ENSURE(context, batch_dims <= NumDimensions(coords));
for (int i = 0; i < batch_dims; ++i) {
TF_LITE_ENSURE_EQ(context, input->dims->data[i], coords->dims->data[i]);
}
// GATHER updates the output tensor dimensions, but TfLiteTensor in the
// MicroInterpreter is a temporary allocation. We must therefore relocate the
// dims from the FlatBuffer to the persistant storage arena.
TfLiteEvalTensor* output_eval =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
context, output, output_eval));
TfLiteIntArray* output_shape = output->dims;
output_shape->size =
NumDimensions(input) + NumDimensions(coords) - 1 - batch_dims;
int output_index = 0;
for (int i = 0; i < axis; ++i) {
output_shape->data[output_index++] = input->dims->data[i];
}
for (int i = batch_dims; i < coords->dims->size; ++i) {
output_shape->data[output_index++] = coords->dims->data[i];
}
for (int i = axis + 1; i < input->dims->size; ++i) {
output_shape->data[output_index++] = input->dims->data[i];
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const auto* params =
reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* coords =
tflite::micro::GetEvalInput(context, node, kInputPositions);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (coords->type == kTfLiteInt32) {
switch (input->type) {
case kTfLiteFloat32:
return Gather<float, int32_t>(params, input, coords, output);
break;
case kTfLiteInt8:
return Gather<int8_t, int32_t>(params, input, coords, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by gather.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
break;
}
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_GATHER() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,201 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
constexpr int kParams = 0;
constexpr int kIndices = 1;
constexpr int kOutputTensor = 0;
constexpr int MAX_INDICES_ND = 5;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* params;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kParams, &params));
const TfLiteTensor* indices;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kIndices, &indices));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
switch (params->type) {
case kTfLiteFloat32:
case kTfLiteInt8:
break;
default:
TF_LITE_KERNEL_LOG(context,
"Params of type '%s' are not supported by gather_nd.",
TfLiteTypeGetName(params->type));
return kTfLiteError;
break;
}
switch (indices->type) {
case kTfLiteInt32:
break;
default:
TF_LITE_KERNEL_LOG(context,
"Indices of type '%s' are not supported by gather_nd.",
TfLiteTypeGetName(indices->type));
return kTfLiteError;
}
const int params_rank = NumDimensions(params);
const int indices_rank = NumDimensions(indices);
const int indices_nd = SizeOfDimension(indices, indices_rank - 1);
if (params_rank < 1) {
TF_LITE_KERNEL_LOG(context, "Params must be at least a vector.");
return kTfLiteError;
}
if (indices_rank < 1) {
TF_LITE_KERNEL_LOG(context, "Indices must be at least a vector.");
return kTfLiteError;
}
if (indices_nd > params_rank) {
TF_LITE_KERNEL_LOG(
context, "Index innermost dimension length must be <= params rank.");
return kTfLiteError;
}
if (indices_nd > MAX_INDICES_ND) {
TF_LITE_KERNEL_LOG(context,
"Index innermost dimension length must not exceed %d.",
MAX_INDICES_ND);
return kTfLiteError;
}
// Assign to output the input type.
output->type = params->type;
// TFLM gather_nd does not create the output tensor, but it needs to ensure
// that the output shape is correct. The result shape is
// indices.shape[:-1] + params.shape[indices.shape[-1]:]
TfLiteIntArray* output_shape = output->dims;
int output_index = 0;
for (int i = 0; i < indices_rank - 1; ++i) {
output_shape->data[output_index++] = indices->dims->data[i];
}
for (int i = indices_nd; i < params_rank; ++i) {
output_shape->data[output_index++] = params->dims->data[i];
}
output_shape->size = output_index;
return kTfLiteOk;
}
template <typename ParamsT, typename IndicesT>
TfLiteStatus GatherNd(const TfLiteEvalTensor* params,
const TfLiteEvalTensor* indices,
TfLiteEvalTensor* output) {
const int indices_dims = indices->dims->size;
const int indices_nd = indices->dims->data[indices_dims - 1];
const int params_dims = params->dims->size;
const IndicesT* index_data = tflite::micro::GetTensorData<IndicesT>(indices);
const ParamsT* param_data = tflite::micro::GetTensorData<ParamsT>(params);
ParamsT* output_data = tflite::micro::GetTensorData<ParamsT>(output);
int n_slices = 1;
for (int i = 0; i < indices_dims - 1; ++i) {
n_slices *= indices->dims->data[i];
}
// If indices[-1] == params.rank, fetch single elements.
// If indices[-1] < params.rank, fetch slices.
int slice_size = 1;
for (int i = indices_nd; i < params_dims; ++i) {
slice_size *= params->dims->data[i];
}
int remain_flat_size = ElementCount(*params->dims);
// Number of elements per dimension
int dims_to_count[MAX_INDICES_ND];
for (int i = 0; i < indices_nd; ++i) {
dims_to_count[i] = remain_flat_size / params->dims->data[i];
remain_flat_size = dims_to_count[i];
}
for (int i = 0; i < n_slices; ++i) {
int from_pos = 0;
for (int j = 0; j < indices_nd; ++j) {
int offset = i * indices_nd + j;
IndicesT index = index_data[offset];
from_pos += index * dims_to_count[j];
}
std::memcpy(output_data + i * slice_size, param_data + from_pos,
sizeof(ParamsT) * slice_size);
}
return kTfLiteOk;
}
template <typename IndicesT>
TfLiteStatus EvalGatherNd(TfLiteContext* context,
const TfLiteEvalTensor* params,
const TfLiteEvalTensor* indices,
TfLiteEvalTensor* output) {
switch (params->type) {
case kTfLiteFloat32:
return GatherNd<float, IndicesT>(params, indices, output);
break;
case kTfLiteInt8:
return GatherNd<int8_t, IndicesT>(params, indices, output);
break;
default:
TF_LITE_KERNEL_LOG(context,
"Params type '%s' are not supported by gather_nd.",
TfLiteTypeGetName(params->type));
return kTfLiteError;
}
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* params =
tflite::micro::GetEvalInput(context, node, kParams);
const TfLiteEvalTensor* indices =
tflite::micro::GetEvalInput(context, node, kIndices);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (indices->type) {
case kTfLiteInt32:
return EvalGatherNd<int32_t>(context, params, indices, output);
break;
default:
TF_LITE_KERNEL_LOG(context,
"Indices of type '%s' are not supported by gather_nd.",
TfLiteTypeGetName(indices->type));
return kTfLiteError;
}
}
} // namespace
TfLiteRegistration Register_GATHER_ND() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -23,72 +23,23 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/hard_swish.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace hard_swish {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
namespace {
void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
}
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
params->input_zero_point = input->params.zero_point;
params->output_zero_point = output->params.zero_point;
const float input_scale = input->params.scale;
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
const float reluish_scale = 3.0f / 32768.0f;
const float output_scale = output->params.scale;
const double output_multiplier =
static_cast<double>(hires_input_scale / output_scale);
int32_t output_multiplier_fixedpoint_int32;
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
&params->output_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
output_multiplier_fixedpoint_int32,
&params->output_multiplier_fixedpoint_int16);
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
const double reluish_multiplier =
static_cast<double>(hires_input_scale / reluish_scale);
int32_t reluish_multiplier_fixedpoint_int32;
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_fixedpoint_int16);
}
return kTfLiteOk;
}
TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
tflite::micro::GetEvalInput(context, node, kHardSwishInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::micro::GetEvalOutput(context, node, kHardSwishOutputTensor);
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
switch (input->type) {
@@ -99,13 +50,6 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} break;
case kTfLiteUInt8: {
tflite::reference_ops::HardSwish<uint8_t>(
*params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} break;
case kTfLiteInt8: {
tflite::reference_ops::HardSwish<int8_t>(
*params, tflite::micro::GetTensorShape(input),
@@ -114,29 +58,24 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorData<int8_t>(output));
} break;
default: {
TF_LITE_KERNEL_LOG(
context,
"Only float32/int8_t/uint8_t are supported currently, got %s",
TfLiteTypeGetName(input->type));
MicroPrintf("Unsupported type %s", TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
return kTfLiteOk;
}
} // namespace hard_swish
} // namespace
TfLiteRegistration Register_HARD_SWISH() {
return {/*init=*/hard_swish::HardSwishInit,
return {/*init=*/HardSwishInit,
/*free=*/nullptr,
/*prepare=*/hard_swish::HardSwishPrepare,
/*invoke=*/hard_swish::HardSwishEval,
/*prepare=*/tflite::HardSwishPrepare,
/*invoke=*/HardSwishEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,30 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
extern const int kHardSwishInputTensor;
extern const int kHardSwishOutputTensor;
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_

View File

@@ -0,0 +1,79 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/hard_swish.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
const int kHardSwishInputTensor = 0;
const int kHardSwishOutputTensor = 0;
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kHardSwishInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kHardSwishOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
params->input_zero_point = input->params.zero_point;
params->output_zero_point = output->params.zero_point;
const float input_scale = input->params.scale;
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
const float reluish_scale = 3.0f / 32768.0f;
const float output_scale = output->params.scale;
const double output_multiplier =
static_cast<double>(hires_input_scale / output_scale);
int32_t output_multiplier_fixedpoint_int32;
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
&params->output_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
output_multiplier_fixedpoint_int32,
&params->output_multiplier_fixedpoint_int16);
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
const double reluish_multiplier =
static_cast<double>(hires_input_scale / reluish_scale);
int32_t reluish_multiplier_fixedpoint_int32;
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_fixedpoint_int16);
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -0,0 +1,166 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
struct OpData {
int then_subgraph_index;
int else_subgraph_index;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const auto* params =
reinterpret_cast<const TfLiteIfParams*>(node->builtin_data);
op_data->then_subgraph_index = params->then_subgraph_index;
op_data->else_subgraph_index = params->else_subgraph_index;
TF_LITE_ENSURE(context, node->inputs->size > 0);
// The first input is the condition.
const TfLiteTensor* cond;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool);
TF_LITE_ENSURE_EQ(context, NumElements(cond), 1);
// The first input of the node is the condition. The rest of inputs are
// passed to the branch subgraphs. Therefore, the number of subgraph inputs
// will be the number of node inputs - 1.
size_t num_inputs = node->inputs->size - 1;
size_t num_outputs = node->outputs->size;
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
TF_LITE_ENSURE(context,
op_data->then_subgraph_index < graph_info->NumSubgraphs());
TF_LITE_ENSURE(context,
op_data->else_subgraph_index < graph_info->NumSubgraphs());
TF_LITE_ENSURE_EQ(
context, num_inputs,
graph_info->NumSubgraphInputs(op_data->then_subgraph_index));
TF_LITE_ENSURE_EQ(
context, num_outputs,
graph_info->NumSubgraphOutputs(op_data->then_subgraph_index));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* cond;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &cond));
bool cond_value = cond->data.b[0];
// Casting to TfliteIntArray is required since we are re-using
// GetExecutionPlan from TfLiteContext. On TFLM this method returns a
// MicroGraph.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
MicroGraph* graph_info;
context->GetExecutionPlan(context,
reinterpret_cast<TfLiteIntArray**>(&graph_info));
// Currently we copy the input / output between the subgraphs. This isn't
// optimized yet.
int active_branch_subgraph_index =
cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index;
for (size_t i = 0;
i < graph_info->NumSubgraphInputs(active_branch_subgraph_index); ++i) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, i + 1);
TfLiteEvalTensor* subgraph_input =
graph_info->GetSubgraphInput(active_branch_subgraph_index, i);
// These checks must occur in Eval since TfLiteEvalTensors are not available
// during Prepare.
size_t input_bytes;
size_t subgraph_input_bytes;
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(input, &input_bytes));
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
subgraph_input, &subgraph_input_bytes));
TF_LITE_ENSURE_TYPES_EQ(context, input->type, subgraph_input->type);
TF_LITE_ENSURE_EQ(context, input_bytes, subgraph_input_bytes);
memcpy(subgraph_input->data.raw, input->data.raw, input_bytes);
}
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(active_branch_subgraph_index));
for (size_t i = 0;
i < graph_info->NumSubgraphOutputs(active_branch_subgraph_index); ++i) {
const TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, i);
TfLiteEvalTensor* subgraph_output =
graph_info->GetSubgraphOutput(active_branch_subgraph_index, i);
// These checks must occur in Eval since TfLiteEvalTensors are not available
// during Prepare.
size_t output_bytes;
size_t subgraph_output_bytes;
TF_LITE_ENSURE_OK(context,
TfLiteEvalTensorByteLength(output, &output_bytes));
TF_LITE_ENSURE_OK(context, TfLiteEvalTensorByteLength(
subgraph_output, &subgraph_output_bytes));
TF_LITE_ENSURE_TYPES_EQ(context, output->type, subgraph_output->type);
TF_LITE_ENSURE_EQ(context, output_bytes, subgraph_output_bytes);
memcpy(output->data.raw, subgraph_output->data.raw, output_bytes);
}
return kTfLiteOk;
}
} // namespace.
TfLiteRegistration Register_IF() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -16,6 +16,8 @@ limitations under the License.
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
#include "tensorflow/lite/micro/test_helpers.h"
namespace tflite {
namespace micro {
@@ -37,7 +39,8 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
kKernelRunnerBuffer_,
kKernelRunnerBufferSize_)),
registration_(registration),
tensors_(tensors) {
tensors_(tensors),
mock_micro_graph_(allocator_) {
// Prepare TfLiteContext:
context_.impl_ = static_cast<void*>(this);
context_.ReportError = ReportOpError;
@@ -47,6 +50,8 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration,
context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
context_.GetScratchBuffer = GetScratchBuffer;
context_.GetExecutionPlan = GetGraph;
context_.recommended_num_threads = 0;
// Prepare TfLiteNode:
node_.inputs = inputs;
@@ -157,5 +162,15 @@ void KernelRunner::ReportOpError(struct TfLiteContext* context,
va_end(args);
}
TfLiteStatus KernelRunner::GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args) {
TFLITE_DCHECK(context != nullptr);
KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
TFLITE_DCHECK(runner != nullptr);
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
*args = reinterpret_cast<TfLiteIntArray*>(runner->GetMockGraph());
return kTfLiteOk;
}
} // namespace micro
} // namespace tflite

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/mock_micro_graph.h"
#include "tensorflow/lite/micro/simple_memory_allocator.h"
namespace tflite {
@@ -45,6 +46,10 @@ class KernelRunner {
// passed into the constructor of this class.
TfLiteStatus Invoke();
// Returns a pointer to the internal MockMicroGraph which KernelRunner uses
// to stub out MicroGraph methods and track invocations on each subgraph.
MockMicroGraph* GetMockGraph() { return &mock_micro_graph_; }
protected:
static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
int tensor_index);
@@ -57,6 +62,11 @@ class KernelRunner {
static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
static void ReportOpError(struct TfLiteContext* context, const char* format,
...);
// This method matches GetExecutionPlan from TfLiteContext since TFLM reuses
// this method to get the MicroGraph from an operator context.
// TODO(b/188226309): Design a cleaner way to get a graph from kernel context.
static TfLiteStatus GetGraph(struct TfLiteContext* context,
TfLiteIntArray** args);
private:
static constexpr int kNumScratchBuffers_ = 12;
@@ -67,6 +77,7 @@ class KernelRunner {
SimpleMemoryAllocator* allocator_ = nullptr;
const TfLiteRegistration& registration_;
TfLiteTensor* tensors_ = nullptr;
MockMicroGraph mock_micro_graph_;
TfLiteContext context_ = {};
TfLiteNode node_ = {};

View File

@@ -49,5 +49,30 @@ PaddingType RuntimePaddingType(TfLitePadding padding) {
}
}
// Relocate tensor dims from FlatBuffer to the persistent storage arena.
// The old dims data is copied to the new storage area.
// The tensor and eval_tensor must be the same tensor.
// Only use during Prepare phase.
TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
TfLiteTensor* tensor,
TfLiteEvalTensor* eval_tensor) {
TF_LITE_ENSURE(context, tensor != nullptr);
TF_LITE_ENSURE(context, eval_tensor != nullptr);
TF_LITE_ENSURE(context, context->AllocatePersistentBuffer != nullptr);
int ranks = tensor->dims->size;
size_t alloc_size = TfLiteIntArrayGetSizeInBytes(ranks);
TfLiteIntArray* new_dims = static_cast<TfLiteIntArray*>(
context->AllocatePersistentBuffer(context, alloc_size));
TfLiteIntArray* old_dims = tensor->dims;
new_dims->size = ranks;
tensor->dims = new_dims;
eval_tensor->dims = new_dims;
for (int i = 0; i < ranks; i++) {
new_dims->data[i] = old_dims->data[i];
}
return kTfLiteOk;
}
} // namespace micro
} // namespace tflite

View File

@@ -72,6 +72,14 @@ bool HaveSameShapes(const TfLiteEvalTensor* input1,
PaddingType RuntimePaddingType(TfLitePadding padding);
// Relocate tensor dims from FlatBuffer to the persistent storage arena.
// The old dims data is copied to the new storage area.
// The tensor and eval_tensor must be the same tensor.
// Only use during Prepare phase.
TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
TfLiteTensor* tensor,
TfLiteEvalTensor* eval_tensor);
} // namespace micro
} // namespace tflite

View File

@@ -70,7 +70,13 @@ TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
// The dims storage is expected to be the same area in memory
// for both TfLiteTensor and TfLiteEvalTensor. This is important
// because TfLiteTensor in the MicroInterpreter is a temporary
// allocation.
// allocation. For the KernelRunner interpreter, TfLiteEvalTensor
// is a temporary allocation. We must therefore relocate the dims
// from the FlatBuffer to the persistant storage arena.
TfLiteEvalTensor* output_eval =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
context, output, output_eval));
output->dims->data[kBatchRank] = batches;
output->dims->data[kHeightRank] = out_height;
output->dims->data[kWidthRank] = out_width;

View File

@@ -67,8 +67,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
data->input_zero_point = 0;
}
// TODO(ahentz): For some reason our implementations don't support
// activations.
// Our implementations don't currently support activations.
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
return kTfLiteOk;

View File

@@ -68,7 +68,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (output->type == kTfLiteInt8) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
LeakyReluOpData* data = static_cast<LeakyReluOpData*>(node->user_data);
const auto* params =
static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
@@ -127,6 +127,10 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
QuantizeLeakyRelu<int8_t>(data, input, output);
return kTfLiteOk;
} break;
case kTfLiteInt16: {
QuantizeLeakyRelu<int16_t>(data, input, output);
return kTfLiteOk;
} break;
default:
TF_LITE_KERNEL_LOG(
context, "Only float32, int8 are supported by LEAKY_RELU, got %s.",

View File

@@ -0,0 +1,150 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
// used only with quantized data
struct LogSoftmaxOpData {
int32_t input_multiplier;
int32_t input_left_shift;
int32_t reverse_scaling_divisor;
int32_t reverse_scaling_right_shift;
int diff_min;
size_t outer_size; // number of tensor elements skipping computation axis
size_t depth; // number of tensor elements on computation axis
};
// input/output tensor index
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context, HaveSameShapes(input, output));
if (input->type == kTfLiteInt8) {
node->user_data =
context->AllocatePersistentBuffer(context, sizeof(LogSoftmaxOpData));
auto data = static_cast<LogSoftmaxOpData*>(node->user_data);
// quantization datum
constexpr int32_t kOutputZeroPoint = 127;
constexpr float kOutputScale = 16.0 / 256;
constexpr double kBeta = 1.0;
constexpr int kScaledDiffIntegerBits = 5;
TF_LITE_ENSURE(context, output->params.scale == kOutputScale);
TF_LITE_ENSURE(context, output->params.zero_point == kOutputZeroPoint);
int input_left_shift;
int reverse_scaling_right_shift;
tflite::PreprocessLogSoftmaxScalingExp(
kBeta, static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
&data->input_multiplier, &input_left_shift,
&data->reverse_scaling_divisor, &reverse_scaling_right_shift);
data->input_left_shift = static_cast<int32_t>(input_left_shift);
data->reverse_scaling_right_shift =
static_cast<int32_t>(-reverse_scaling_right_shift);
// diff_min has a negative value, and is used to limit the maximum magnitude
// of the diffs, which are <= 0.
data->diff_min =
-tflite::CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
RuntimeShape input_shape = GetTensorShape(input);
const int trailing_dim = input_shape.DimensionsCount() - 1;
data->outer_size =
static_cast<size_t>(FlatSizeSkipDim(input_shape, trailing_dim));
data->depth = static_cast<size_t>(input_shape.Dims(trailing_dim));
}
return kTfLiteOk;
}
TfLiteStatus LogSoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
const LogSoftmaxOpData* data =
static_cast<LogSoftmaxOpData*>(node->user_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
SoftmaxParams op_params = {};
reference_ops::LogSoftmax(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
SoftmaxParams op_params = {};
op_params.input_multiplier = data->input_multiplier;
op_params.input_left_shift = data->input_left_shift;
op_params.reverse_scaling_divisor = data->reverse_scaling_divisor;
op_params.reverse_scaling_right_shift = data->reverse_scaling_right_shift;
op_params.diff_min = data->diff_min;
reference_ops::LogSoftmax(op_params, data->outer_size, data->depth,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default:
TF_LITE_KERNEL_LOG(context,
"LOG_SOFTMAX only supports float32, int8, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
} // namespace
TfLiteRegistration Register_LOG_SOFTMAX() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/LogSoftmaxPrepare,
/*invoke=*/LogSoftmaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/logical.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
@@ -19,60 +21,17 @@ limitations under the License.
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace logical {
namespace {
// Input/output tensor index.
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool)) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (tflite::micro::HaveSameShapes(input1, input2)) {
reference_ops::BinaryFunction<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
} else {
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
}
return kTfLiteOk;
}
bool LogicalOr(bool x, bool y) { return x || y; }
TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
return LogicalImpl(context, node, LogicalOr);
}
bool LogicalAnd(bool x, bool y) { return x && y; }
TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
return LogicalImpl(context, node, LogicalAnd);
}
} // namespace
} // namespace logical
TfLiteRegistration Register_LOGICAL_OR() {
// Init, Free, Prepare, Eval are satisfying the Interface required by
@@ -80,7 +39,7 @@ TfLiteRegistration Register_LOGICAL_OR() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalOrEval,
/*invoke=*/LogicalOrEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
@@ -93,13 +52,11 @@ TfLiteRegistration Register_LOGICAL_AND() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/nullptr,
/*invoke=*/logical::LogicalAndEval,
/*invoke=*/LogicalAndEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,35 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Input/output tensor index.
extern const int kLogicalInputTensor1;
extern const int kLogicalInputTensor2;
extern const int kLogicalOutputTensor;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool));
bool LogicalOr(bool x, bool y);
bool LogicalAnd(bool x, bool y);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_

View File

@@ -0,0 +1,63 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/logical.h"
namespace tflite {
// Input/output tensor index.
const int kLogicalInputTensor1 = 0;
const int kLogicalInputTensor2 = 1;
const int kLogicalOutputTensor = 0;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool)) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kLogicalInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kLogicalInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kLogicalOutputTensor);
if (tflite::micro::HaveSameShapes(input1, input2)) {
reference_ops::BinaryFunction<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
} else {
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
}
return kTfLiteOk;
}
bool LogicalOr(bool x, bool y) { return x || y; }
bool LogicalAnd(bool x, bool y) { return x && y; }
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -24,71 +24,24 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/logistic.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
OpData* data) {
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
std::numeric_limits<int8_t>::min());
static constexpr int kInputIntegerBits = 4;
const double input_real_multiplier =
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
data->input_zero_point = input->params.zero_point;
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
return kTfLiteOk;
}
} // namespace
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
return CalculateArithmeticOpData(context, node, data);
return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic));
}
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
tflite::micro::GetEvalInput(context, node, kLogisticInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
@@ -133,18 +86,16 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
} // namespace activations
} // namespace
TfLiteRegistration Register_LOGISTIC() {
return {/*init=*/activations::LogisticInit,
return {/*init=*/LogisticInit,
/*free=*/nullptr,
/*prepare=*/activations::LogisticPrepare,
/*invoke=*/activations::LogisticEval,
/*prepare=*/LogisticPrepare,
/*invoke=*/LogisticEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,42 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
extern const int kLogisticInputTensor;
extern const int kLogisticOutputTensor;
struct OpDataLogistic {
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
TfLiteNode* node,
OpDataLogistic* data);
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_

View File

@@ -0,0 +1,68 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/logistic.h"
namespace tflite {
const int kLogisticInputTensor = 0;
const int kLogisticOutputTensor = 0;
TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
TfLiteNode* node,
OpDataLogistic* data) {
const TfLiteTensor* input = GetInput(context, node, kLogisticInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kLogisticOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
std::numeric_limits<int8_t>::min());
static constexpr int kInputIntegerBits = 4;
const double input_real_multiplier =
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
data->input_zero_point = input->params.zero_point;
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
return kTfLiteOk;
}
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
return CalculateArithmeticOpDataLogistic(context, node, data);
}
} // namespace tflite

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -32,23 +32,40 @@ namespace tflite {
// have their Register function declarations in the tflite namespace.
TfLiteRegistration Register_ADD_N();
TfLiteRegistration Register_AVERAGE_POOL_2D();
TfLiteRegistration Register_BATCH_TO_SPACE_ND();
TfLiteRegistration Register_CAST();
TfLiteRegistration Register_CONV_2D();
TfLiteRegistration Register_CUMSUM();
TfLiteRegistration Register_DEPTH_TO_SPACE();
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
TfLiteRegistration Register_DIV();
TfLiteRegistration Register_ELU();
TfLiteRegistration Register_EXP();
TfLiteRegistration Register_EXPAND_DIMS();
TfLiteRegistration Register_FILL();
TfLiteRegistration Register_FLOOR_DIV();
TfLiteRegistration Register_FLOOR_MOD();
TfLiteRegistration Register_GATHER();
TfLiteRegistration Register_GATHER_ND();
TfLiteRegistration Register_HARD_SWISH();
TfLiteRegistration Register_IF();
TfLiteRegistration Register_L2_POOL_2D();
TfLiteRegistration Register_LEAKY_RELU();
TfLiteRegistration Register_LOG_SOFTMAX();
TfLiteRegistration Register_LOGICAL_AND();
TfLiteRegistration Register_LOGICAL_OR();
TfLiteRegistration Register_LOGISTIC();
TfLiteRegistration Register_MAX_POOL_2D();
TfLiteRegistration Register_QUANTIZE();
TfLiteRegistration Register_RELU();
TfLiteRegistration Register_RELU6();
TfLiteRegistration Register_RESIZE_BILINEAR();
TfLiteRegistration Register_SHAPE();
TfLiteRegistration Register_SOFTMAX();
TfLiteRegistration Register_SPACE_TO_BATCH_ND();
TfLiteRegistration Register_SPACE_TO_DEPTH();
TfLiteRegistration Register_SQUEEZE();
TfLiteRegistration Register_SVDF();
TfLiteRegistration Register_TRANSPOSE();
TfLiteRegistration Register_TRANSPOSE_CONV();
TfLiteRegistration Register_ZEROS_LIKE();
@@ -59,7 +76,6 @@ TfLiteRegistration Register_ABS();
TfLiteRegistration Register_ADD();
TfLiteRegistration Register_ARG_MAX();
TfLiteRegistration Register_ARG_MIN();
TfLiteRegistration Register_AVERAGE_POOL_2D();
TfLiteRegistration Register_CEIL();
// TODO(b/160234179): Change custom OPs to also return by value.
TfLiteRegistration* Register_CIRCULAR_BUFFER();
@@ -70,16 +86,11 @@ TfLiteRegistration Register_EQUAL();
TfLiteRegistration Register_FLOOR();
TfLiteRegistration Register_GREATER();
TfLiteRegistration Register_GREATER_EQUAL();
TfLiteRegistration Register_HARD_SWISH();
TfLiteRegistration Register_LESS();
TfLiteRegistration Register_LESS_EQUAL();
TfLiteRegistration Register_LOG();
TfLiteRegistration Register_LOGICAL_AND();
TfLiteRegistration Register_LOGICAL_NOT();
TfLiteRegistration Register_LOGICAL_OR();
TfLiteRegistration Register_LOGISTIC();
TfLiteRegistration Register_MAXIMUM();
TfLiteRegistration Register_MAX_POOL_2D();
TfLiteRegistration Register_MEAN();
TfLiteRegistration Register_MINIMUM();
TfLiteRegistration Register_MUL();
@@ -90,8 +101,6 @@ TfLiteRegistration Register_PAD();
TfLiteRegistration Register_PADV2();
TfLiteRegistration Register_PRELU();
TfLiteRegistration Register_REDUCE_MAX();
TfLiteRegistration Register_RELU();
TfLiteRegistration Register_RELU6();
TfLiteRegistration Register_RESHAPE();
TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
TfLiteRegistration Register_ROUND();

View File

@@ -62,7 +62,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
@@ -104,7 +104,6 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
@@ -114,33 +113,13 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
reference_integer_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else if (output->type == kTfLiteUInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
}
}
}
void EvalFloat(TfLiteContext* context, TfLiteNode* node,
@@ -203,7 +182,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input1->type) {
case kTfLiteUInt8:
case kTfLiteInt8:
EvalQuantized(context, node, data, input1, input2, output);
break;

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -15,163 +15,34 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/pooling.h"
namespace tflite {
namespace ops {
namespace micro {
namespace pooling {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
struct OpData {
TfLitePaddingValues padding;
int32_t activation_min;
int32_t activation_max;
float activation_min_f32;
float activation_max_f32;
};
TfLiteStatus CalculateOpData(const TfLiteContext* context,
const TfLitePoolParams* params,
const TfLiteTensor* input,
const TfLiteTensor* output, OpData* data) {
// input: batch, height, width, channel
int height = SizeOfDimension(input, 1);
int width = SizeOfDimension(input, 2);
int out_height, out_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
/*dilation_rate_height=*/1,
/*dilation_rate_width=*/1, height, width, params->filter_height,
params->filter_width, params->padding, &out_height, &out_width);
return kTfLiteOk;
}
void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::AveragePool(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpData* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
if (input->type == kTfLiteUInt8) {
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
} else {
reference_integer_ops::MaxPool(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
}
} // namespace
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const OpDataPooling* data =
static_cast<const OpDataPooling*>(node->user_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
micro::GetEvalInput(context, node, kPoolingInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
// Inputs and outputs share the same type, guaranteed by the converter.
switch (input->type) {
case kTfLiteFloat32:
AverageEvalFloat(context, node, params, data, input, output);
AveragePoolingEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
AverageEvalQuantized(context, node, params, data, input, output);
AveragePoolingEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
@@ -186,20 +57,20 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpData* data = static_cast<const OpData*>(node->user_data);
const OpDataPooling* data =
static_cast<const OpDataPooling*>(node->user_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
micro::GetEvalInput(context, node, kPoolingInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
switch (input->type) {
case kTfLiteFloat32:
MaxEvalFloat(context, node, params, data, input, output);
MaxPoolingEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteUInt8:
case kTfLiteInt8:
MaxEvalQuantized(context, node, params, data, input, output);
MaxPoolingEvalQuantized(context, node, params, data, input, output);
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
@@ -211,42 +82,16 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
if (input->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation, &data->activation_min_f32,
&data->activation_max_f32);
} else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
CalculateActivationRangeQuantized(context, params->activation, output,
&data->activation_min,
&data->activation_max);
}
return kTfLiteOk;
}
} // namespace pooling
} // namespace
TfLiteRegistration Register_AVERAGE_POOL_2D() {
return {/*init=*/pooling::Init,
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/pooling::Prepare,
/*invoke=*/pooling::AverageEval,
/*prepare=*/PoolingPrepare,
/*invoke=*/AverageEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
@@ -254,16 +99,14 @@ TfLiteRegistration Register_AVERAGE_POOL_2D() {
}
TfLiteRegistration Register_MAX_POOL_2D() {
return {/*init=*/pooling::Init,
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/pooling::Prepare,
/*invoke=*/pooling::MaxEval,
/*prepare=*/PoolingPrepare,
/*invoke=*/MaxEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,71 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
extern const int kPoolingInputTensor;
extern const int kPoolingOutputTensor;
struct OpDataPooling {
TfLitePaddingValues padding;
int32_t activation_min;
int32_t activation_max;
float activation_min_f32;
float activation_max_f32;
};
TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
const TfLitePoolParams* params,
const TfLiteTensor* input,
const TfLiteTensor* output,
OpDataPooling* data);
TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node);
void AveragePoolingEvalFloat(const TfLiteContext* context,
const TfLiteNode* node,
const TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output);
void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output);
void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output);
void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_

View File

@@ -0,0 +1,163 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/pooling.h"
namespace tflite {
const int kPoolingInputTensor = 0;
const int kPoolingOutputTensor = 0;
TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
const TfLitePoolParams* params,
const TfLiteTensor* input,
const TfLiteTensor* output,
OpDataPooling* data) {
// input: batch, height, width, channel
int height = SizeOfDimension(input, 1);
int width = SizeOfDimension(input, 2);
int out_height, out_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
/*dilation_rate_height=*/1,
/*dilation_rate_width=*/1, height, width, params->filter_height,
params->filter_width, params->padding, &out_height, &out_width);
return kTfLiteOk;
}
TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataPooling* data = static_cast<OpDataPooling*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kPoolingInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kPoolingOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(
CalculateOpDataPooling(context, params, input, output, data));
if (input->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation, &data->activation_min_f32,
&data->activation_max_f32);
} else if (input->type == kTfLiteInt8) {
CalculateActivationRangeQuantized(context, params->activation, output,
&data->activation_min,
&data->activation_max);
}
return kTfLiteOk;
}
void AveragePoolingEvalFloat(const TfLiteContext* context,
const TfLiteNode* node,
const TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteInt8);
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
reference_integer_ops::AveragePool(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
reference_integer_ops::MaxPool(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} // namespace tflite

View File

@@ -57,6 +57,7 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
if ((input->type == kTfLiteInt16 && output->type == kTfLiteInt8) ||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt8) ||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt16) ||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt32) ||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16) ||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt32)) {
@@ -145,6 +146,13 @@ TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
break;
case kTfLiteInt32:
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), size,

View File

@@ -103,14 +103,15 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* output = GetOutput(context, node, 0);
if (input->type == kTfLiteInt8) {
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
const double real_multiplier = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
}
int output_size = NumElements(output);
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 ||
input->type == kTfLiteInt16) {
context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
&op_data->temp_buffer_idx);
op_data->input_zp = input->params.zero_point;
@@ -213,6 +214,43 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
temp_buffer, false));
}
} break;
case kTfLiteInt16: {
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (params->keep_dims && special_case_4d_axes_1_and_2) {
reference_integer_ops::Mean(
op_params, op_data->multiplier, op_data->shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input), op_data->input_zp,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output), op_data->output_zp);
} else if (op_data->input_zp == op_data->output_zp &&
op_data->input_scale == op_data->output_scale) {
int32_t* temp_buffer = static_cast<int32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::Mean(tflite::micro::GetTensorData<int16_t>(input),
input->dims->data, input->dims->size,
tflite::micro::GetTensorData<int16_t>(output),
output->dims->data, output->dims->size,
tflite::micro::GetTensorData<int>(axis),
num_axis, params->keep_dims, temp_index,
resolved_axis, temp_buffer));
} else {
int32_t* temp_buffer = static_cast<int32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::QuantizedMeanOrSum(
tflite::micro::GetTensorData<int16_t>(input), op_data->input_zp,
op_data->input_scale, input->dims->data, input->dims->size,
tflite::micro::GetTensorData<int16_t>(output),
op_data->output_zp, op_data->output_scale, output->dims->data,
output->dims->size, tflite::micro::GetTensorData<int>(axis),
num_axis, params->keep_dims, temp_index, resolved_axis,
temp_buffer, false));
}
} break;
case kTfLiteUInt8: {
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (params->keep_dims && special_case_4d_axes_1_and_2) {

View File

@@ -0,0 +1,116 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kSizeTensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32);
output->type = input->type;
TF_LITE_ENSURE_MSG(context, IsConstantTensor(size),
"Non constant size tensor not supported");
// Ensure params are valid.
auto* params =
reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
if (params->half_pixel_centers && params->align_corners) {
TF_LITE_KERNEL_LOG(
context, "If half_pixel_centers is True, align_corners must be False.");
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* size =
tflite::micro::GetEvalInput(context, node, kSizeTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
tflite::ResizeBilinearParams op_params;
op_params.align_corners = params->align_corners;
op_params.half_pixel_centers = params->half_pixel_centers;
reference_ops::ResizeBilinear(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else if (output->type == kTfLiteInt8) {
tflite::ResizeBilinearParams op_params;
op_params.align_corners = params->align_corners;
op_params.half_pixel_centers = params->half_pixel_centers;
reference_ops::ResizeBilinearInteger(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(size),
tflite::micro::GetTensorData<int32_t>(size),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
TF_LITE_KERNEL_LOG(context, "Output type is %d, requires float or int8.",
output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_RESIZE_BILINEAR() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

Some files were not shown because too many files have changed in this diff Show More