mirror of
https://github.com/jomjol/AI-on-the-edge-device.git
synced 2025-12-10 13:36:54 +03:00
v11.2.0
This commit is contained in:
@@ -15,6 +15,7 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
|
||||
|
||||
#include <algorithm>
|
||||
#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
|
||||
#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
|
||||
#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
|
||||
|
||||
@@ -86,6 +86,16 @@ using int32 = std::int32_t;
|
||||
using uint32 = std::uint32_t;
|
||||
#endif // !defined(TF_LITE_STATIC_MEMORY)
|
||||
|
||||
// Allow for cross-compiler usage of function signatures - currently used for
|
||||
// specifying named RUY profiler regions in templated methods.
|
||||
#if defined(_MSC_VER)
|
||||
#define TFLITE_PRETTY_FUNCTION __FUNCSIG__
|
||||
#elif defined(__GNUC__)
|
||||
#define TFLITE_PRETTY_FUNCTION __PRETTY_FUNCTION__
|
||||
#else
|
||||
#define TFLITE_PRETTY_FUNCTION __func__
|
||||
#endif
|
||||
|
||||
// TFLITE_DEPRECATED()
|
||||
//
|
||||
// Duplicated from absl/base/macros.h to avoid pulling in that library.
|
||||
|
||||
@@ -324,7 +324,7 @@ void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
|
||||
// - n_input: the size for input and output.
|
||||
// - output: the 16 bit output
|
||||
// The input is in Qm.15-m format and the output is in Q0.15 format.
|
||||
void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
|
||||
void ApplyTanh(int32_t intger_bits, const int16_t* input, int32_t n_batch,
|
||||
int32_t n_input, int16_t* output);
|
||||
|
||||
// Apply Tanh to a quantized vector. Tbe internal calculation is in float.
|
||||
|
||||
@@ -15,6 +15,7 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
|
||||
@@ -16,6 +16,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
|
||||
@@ -0,0 +1,247 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace reference_ops {
|
||||
|
||||
template <typename T>
|
||||
inline void DivCheckArithmeticParams(const ArithmeticParams& params) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
// Input offset is negative input zero point. Activation tensors are
|
||||
// asymmetric quantized so they span the full int8 range.
|
||||
constexpr int32_t max_value =
|
||||
static_cast<int32_t>(std::numeric_limits<T>::max());
|
||||
TFLITE_DCHECK_GE(params.input1_offset, -max_value);
|
||||
TFLITE_DCHECK_LE(params.input1_offset, max_value);
|
||||
TFLITE_DCHECK_GE(params.input2_offset, -max_value);
|
||||
TFLITE_DCHECK_LE(params.input2_offset, max_value);
|
||||
TFLITE_DCHECK_GE(params.output_offset, -max_value);
|
||||
TFLITE_DCHECK_LE(params.output_offset, max_value);
|
||||
}
|
||||
|
||||
// Element-wise div that can often be used for inner loop of broadcast Div as
|
||||
// well as the non-broadcast Div.
|
||||
template <typename T>
|
||||
inline void DivElementwise(int size, const ArithmeticParams& params,
|
||||
const T* input1_data, const T* input2_data,
|
||||
T* output_data) {
|
||||
DivCheckArithmeticParams<T>(params);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
int32_t input1_val = params.input1_offset + input1_data[i];
|
||||
int32_t input2_val = params.input2_offset + input2_data[i];
|
||||
TFLITE_DCHECK_NE(input2_val, 0);
|
||||
if (input2_val < 0) {
|
||||
// Invert signs to avoid a negative input2_val as input2_inv needs to be
|
||||
// positive to be used as multiplier of MultiplyByQuantizedMultiplier.
|
||||
input1_val = -input1_val;
|
||||
input2_val = -input2_val;
|
||||
}
|
||||
int recip_shift;
|
||||
const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
|
||||
const int headroom = CountLeadingSignBits(input1_val);
|
||||
const int32_t unscaled_quotient =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
|
||||
headroom);
|
||||
const int total_shift = params.output_shift - recip_shift - headroom;
|
||||
const int32_t unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
unscaled_quotient, params.output_multiplier, total_shift);
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[i] = static_cast<T>(clamped_output);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Div(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8_t* input2_data,
|
||||
const RuntimeShape& output_shape, uint8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void Div(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const int8_t* input2_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
|
||||
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
template <typename T, int N = 5>
|
||||
inline void BroadcastDivSlowQuantized(
|
||||
const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape,
|
||||
const T* input1_data, const RuntimeShape& unextended_input2_shape,
|
||||
const T* input2_data, const RuntimeShape& unextended_output_shape,
|
||||
T* output_data) {
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
|
||||
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
|
||||
&output_desc);
|
||||
|
||||
DivCheckArithmeticParams<T>(params);
|
||||
|
||||
auto div_func = [&](int indexes[N]) {
|
||||
int32_t input1_val =
|
||||
params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
|
||||
int32_t input2_val =
|
||||
params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
|
||||
TFLITE_DCHECK_NE(input2_val, 0);
|
||||
if (input2_val < 0) {
|
||||
// Invert signs to avoid a negative input2_val as input2_inv needs to be
|
||||
// positive to be used as multiplier of MultiplyByQuantizedMultiplier.
|
||||
input1_val = -input1_val;
|
||||
input2_val = -input2_val;
|
||||
}
|
||||
int recip_shift;
|
||||
const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
|
||||
const int headroom = CountLeadingSignBits(input1_val);
|
||||
const int32_t unscaled_quotient =
|
||||
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
|
||||
headroom);
|
||||
const int total_shift = params.output_shift - recip_shift - headroom;
|
||||
const int32_t unclamped_result =
|
||||
params.output_offset +
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
unscaled_quotient, params.output_multiplier, total_shift);
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, unclamped_result));
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
static_cast<T>(clamped_output);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, div_func);
|
||||
}
|
||||
|
||||
template <int N = 5>
|
||||
inline void BroadcastDivSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& unextended_input1_shape,
|
||||
const uint8_t* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const uint8_t* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
uint8_t* output_data) {
|
||||
BroadcastDivSlowQuantized<uint8_t, N>(
|
||||
params, unextended_input1_shape, input1_data, unextended_input2_shape,
|
||||
input2_data, unextended_output_shape, output_data);
|
||||
}
|
||||
|
||||
template <int N = 5>
|
||||
inline void BroadcastDivSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& unextended_input1_shape,
|
||||
const int8_t* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const int8_t* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
int8_t* output_data) {
|
||||
BroadcastDivSlowQuantized<int8_t, N>(
|
||||
params, unextended_input1_shape, input1_data, unextended_input2_shape,
|
||||
input2_data, unextended_output_shape, output_data);
|
||||
}
|
||||
|
||||
// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
|
||||
// dimensionality if the runtime code does a single loop over one dimension
|
||||
// that handles broadcasting as the base case. The code generator would then
|
||||
// generate max(D1, D2) nested for loops.
|
||||
template <typename T, int N = 5>
|
||||
void BroadcastDivSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& unextended_input1_shape,
|
||||
const T* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const T* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
T* output_data) {
|
||||
T output_activation_min;
|
||||
T output_activation_max;
|
||||
GetActivationParams(params, &output_activation_min, &output_activation_max);
|
||||
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
|
||||
|
||||
NdArrayDesc<N> desc1;
|
||||
NdArrayDesc<N> desc2;
|
||||
NdArrayDesc<N> output_desc;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
|
||||
&output_desc);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest
|
||||
// stride, typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
|
||||
auto div_func = [&](int indexes[N]) {
|
||||
output_data[SubscriptToIndex(output_desc, indexes)] =
|
||||
ActivationFunctionWithMinMax(
|
||||
input1_data[SubscriptToIndex(desc1, indexes)] /
|
||||
input2_data[SubscriptToIndex(desc2, indexes)],
|
||||
output_activation_min, output_activation_max);
|
||||
};
|
||||
NDOpsHelper<N>(output_desc, div_func);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void Div(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const T* input1_data,
|
||||
const RuntimeShape& input2_shape, const T* input2_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
T output_activation_min;
|
||||
T output_activation_max;
|
||||
GetActivationParams(params, &output_activation_min, &output_activation_max);
|
||||
|
||||
const int flat_size =
|
||||
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = ActivationFunctionWithMinMax(
|
||||
input1_data[i] / input2_data[i], output_activation_min,
|
||||
output_activation_max);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
@@ -23,9 +25,9 @@ namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
inline int16_t SaturatingLeftShift(int16_t value, int amount) {
|
||||
int32_t result = static_cast<int32_t>(value) * (1 << amount);
|
||||
result = std::min<int32_t>(result, std::numeric_limits<int16_t>::max());
|
||||
result = std::max<int32_t>(result, std::numeric_limits<int16_t>::min());
|
||||
int64_t result = static_cast<int64_t>(value) * (1 << amount);
|
||||
result = std::min<int64_t>(result, std::numeric_limits<int16_t>::max());
|
||||
result = std::max<int64_t>(result, std::numeric_limits<int16_t>::min());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -15,11 +15,101 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_integer_ops {
|
||||
|
||||
// For per-channel functions, since it is defined in quantization spec that
|
||||
// weights are symmetric
|
||||
// (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric),
|
||||
// zero_point (params.weights_offset) is always 0.
|
||||
// However, for per-tensor functions, params.weights_offset is still applied for
|
||||
// backward compatibility.
|
||||
|
||||
inline void FullyConnectedPerChannel(
|
||||
const FullyConnectedParams& params, const int32_t* output_multiplier,
|
||||
const int* output_shift, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const int32_t* bias_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
const int32_t input_offset = params.input_offset;
|
||||
const int32_t output_offset = params.output_offset;
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
||||
const int batches = output_shape.Dims(0);
|
||||
const int output_depth = output_shape.Dims(1);
|
||||
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
int32_t acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32_t input_val = input_data[b * accum_depth + d];
|
||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += filter_val * (input_val + input_offset);
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[out_c];
|
||||
}
|
||||
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_c],
|
||||
output_shift[out_c]);
|
||||
acc += output_offset;
|
||||
acc = std::max(acc, output_activation_min);
|
||||
acc = std::min(acc, output_activation_max);
|
||||
output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AccumScalar>
|
||||
inline void FullyConnectedPerChannel(
|
||||
const FullyConnectedParams& params, const int32_t* output_multiplier,
|
||||
const int* output_shift, const RuntimeShape& input_shape,
|
||||
const int16_t* input_data, const RuntimeShape& filter_shape,
|
||||
const int8_t* filter_data, const RuntimeShape& bias_shape,
|
||||
const AccumScalar* bias_data, const RuntimeShape& output_shape,
|
||||
int16_t* output_data) {
|
||||
const int32_t output_activation_min = params.quantized_activation_min;
|
||||
const int32_t output_activation_max = params.quantized_activation_max;
|
||||
TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
|
||||
TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
|
||||
|
||||
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
|
||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
||||
const int output_dim_count = output_shape.DimensionsCount();
|
||||
const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
|
||||
const int output_depth = output_shape.Dims(output_dim_count - 1);
|
||||
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
for (int out_c = 0; out_c < output_depth; ++out_c) {
|
||||
AccumScalar acc = 0;
|
||||
for (int d = 0; d < accum_depth; ++d) {
|
||||
int32_t input_val = input_data[b * accum_depth + d];
|
||||
int32_t filter_val = filter_data[out_c * accum_depth + d];
|
||||
acc += filter_val * input_val;
|
||||
}
|
||||
if (bias_data) {
|
||||
acc += bias_data[out_c];
|
||||
}
|
||||
int32_t acc_scaled = MultiplyByQuantizedMultiplier(
|
||||
acc, output_multiplier[out_c], output_shift[out_c]);
|
||||
acc_scaled = std::max(acc_scaled, output_activation_min);
|
||||
acc_scaled = std::min(acc_scaled, output_activation_max);
|
||||
output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void FullyConnected(
|
||||
const FullyConnectedParams& params, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& filter_shape,
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -15,7 +15,9 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
@@ -15,7 +15,9 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -15,6 +15,7 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
@@ -15,6 +15,7 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
|
||||
@@ -15,6 +15,7 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "fixedpoint/fixedpoint.h"
|
||||
|
||||
@@ -15,6 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_CONV_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_CONV_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
|
||||
@@ -27,6 +27,11 @@ class RuntimeShape {
|
||||
public:
|
||||
RuntimeShape& operator=(RuntimeShape const&) = delete;
|
||||
|
||||
// RuntimeShape in TFLM supports up to 5 dimensions.
|
||||
// The name kMaxSmallSize comes from the same file of the upstream
|
||||
// tensorflow lite repo and need to be kept the same for max reuse.
|
||||
static constexpr int kMaxSmallSize = 5;
|
||||
|
||||
RuntimeShape() : size_(0) {}
|
||||
|
||||
explicit RuntimeShape(int dimensions_count) : size_(dimensions_count) {}
|
||||
@@ -104,11 +109,9 @@ class RuntimeShape {
|
||||
sizeof(int32_t) * shape.DimensionsCount());
|
||||
}
|
||||
|
||||
// A maximum of 4 dimensions are supported on TFLM.
|
||||
static constexpr int kMaxSize = 5;
|
||||
int32_t size_;
|
||||
union {
|
||||
int32_t dims_[kMaxSize];
|
||||
int32_t dims_[kMaxSmallSize];
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -974,11 +974,11 @@ struct StridedSliceParams {
|
||||
int8_t strides_count;
|
||||
int32_t strides[5];
|
||||
|
||||
int16_t begin_mask;
|
||||
int16_t ellipsis_mask;
|
||||
int16_t end_mask;
|
||||
int16_t new_axis_mask;
|
||||
int16_t shrink_axis_mask;
|
||||
uint16_t begin_mask;
|
||||
uint16_t ellipsis_mask;
|
||||
uint16_t end_mask;
|
||||
uint16_t new_axis_mask;
|
||||
uint16_t shrink_axis_mask;
|
||||
};
|
||||
|
||||
struct TanhParams {
|
||||
|
||||
@@ -177,6 +177,14 @@ inline int64_t NumElements(const TfLiteTensor* t) {
|
||||
return NumElements(t->dims);
|
||||
}
|
||||
|
||||
inline int64_t NumElements(const int* dims, int num_dims) {
|
||||
int64_t count = 1;
|
||||
for (int i = 0; i < num_dims; ++i) {
|
||||
count *= dims[i];
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// Determines whether tensor is constant.
|
||||
// TODO(b/138199592): Introduce new query which checks for constant OR
|
||||
// persistent-read-only, which would be useful for most tensor kernels that
|
||||
@@ -308,7 +316,7 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
|
||||
const TfLiteTensor* input3,
|
||||
TfLiteIntArray** output_shape);
|
||||
|
||||
// Return the size of given type in bytes. Return 0 in in case of string.
|
||||
// Return the size of given type in bytes. Return 0 in case of string.
|
||||
int TfLiteTypeGetSize(TfLiteType type);
|
||||
|
||||
// Whether the current platform is mobile (Android or iOS).
|
||||
|
||||
Reference in New Issue
Block a user