mirror of
https://github.com/jomjol/AI-on-the-edge-device.git
synced 2025-12-11 05:56:57 +03:00
Rolling 20220924
This commit is contained in:
@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@@ -165,4 +165,4 @@ inline void HardSwish(const HardSwishParams& params,
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_
|
||||
|
||||
@@ -16,6 +16,7 @@ limitations under the License.
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <complex>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
|
||||
@@ -61,6 +62,20 @@ inline void Mul(const ArithmeticParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
inline void Mul(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const std::complex<float>* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const std::complex<float>* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
std::complex<float>* output_data) {
|
||||
const int flat_size =
|
||||
MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
output_data[i] = input1_data[i] * input2_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline void Mul(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const uint8_t* input1_data,
|
||||
const RuntimeShape& input2_shape, const uint8_t* input2_data,
|
||||
@@ -162,6 +177,37 @@ void BroadcastMul4DSlow(const ArithmeticParams& params,
|
||||
}
|
||||
}
|
||||
|
||||
inline void BroadcastMul4DSlow(const ArithmeticParams& params,
|
||||
const RuntimeShape& unextended_input1_shape,
|
||||
const std::complex<float>* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const std::complex<float>* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
std::complex<float>* output_data) {
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
|
||||
for (int b = 0; b < output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < output_shape.Dims(3); ++c) {
|
||||
output_data[Offset(output_shape, b, y, x, c)] =
|
||||
input1_data[SubscriptToIndex(desc1, b, y, x, c)] *
|
||||
input2_data[SubscriptToIndex(desc2, b, y, x, c)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
|
||||
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "ruy/profiler/instrumentation.h" // from @ruy
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace reference_ops {
|
||||
|
||||
template <typename D, typename T>
|
||||
void Select(const RuntimeShape& input_condition_shape,
|
||||
const D* input_condition_data, const RuntimeShape& input_x_shape,
|
||||
const T* input_x_data, const RuntimeShape& input_y_shape,
|
||||
const T* input_y_data, const RuntimeShape& output_shape,
|
||||
T* output_data) {
|
||||
ruy::profiler::ScopeLabel label("Select");
|
||||
int64_t flatsize;
|
||||
// Allow select operator executions on mixed scalar tensors and one element
|
||||
// tensors.
|
||||
if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
|
||||
input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1) {
|
||||
flatsize = 1;
|
||||
} else {
|
||||
flatsize = MatchingFlatSize(input_condition_shape, input_x_shape,
|
||||
input_y_shape, output_shape);
|
||||
}
|
||||
for (int64_t i = 0; i < flatsize; ++i) {
|
||||
output_data[i] =
|
||||
input_condition_data[i] ? input_x_data[i] : input_y_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename D, typename T>
|
||||
void RankOneSelect(const RuntimeShape& input_condition_shape,
|
||||
const D* input_condition_data,
|
||||
const RuntimeShape& input_x_shape, const T* input_x_data,
|
||||
const RuntimeShape& input_y_shape, const T* input_y_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
ruy::profiler::ScopeLabel label("Select/RankOneSelect");
|
||||
const int64_t outer_size = input_condition_shape.FlatSize();
|
||||
int64_t inner_size;
|
||||
if (input_condition_shape.DimensionsCount() == 0) {
|
||||
inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
|
||||
} else {
|
||||
TFLITE_DCHECK_EQ(
|
||||
MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0),
|
||||
outer_size);
|
||||
inner_size =
|
||||
MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
|
||||
}
|
||||
|
||||
int64_t offset = 0;
|
||||
for (int64_t i = 0; i < outer_size; i++) {
|
||||
const T* input_data = input_condition_data[i] ? input_x_data : input_y_data;
|
||||
memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
|
||||
offset += inner_size;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename D, typename T>
|
||||
void BroadcastSelect5DSlow(const RuntimeShape& input_condition_shape,
|
||||
const D* input_condition_data,
|
||||
const RuntimeShape& input_x_shape,
|
||||
const T* input_x_data,
|
||||
const RuntimeShape& input_y_shape,
|
||||
const T* input_y_data,
|
||||
const RuntimeShape& output_shape, T* output_data) {
|
||||
ruy::profiler::ScopeLabel label("Select/BroadcastSelectSlow");
|
||||
TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 5);
|
||||
TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 5);
|
||||
TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 5);
|
||||
TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 5);
|
||||
|
||||
NdArrayDesc<5> desc_condition;
|
||||
NdArrayDesc<5> desc_x;
|
||||
NdArrayDesc<5> desc_y;
|
||||
NdArrayDesc<5> desc_output;
|
||||
const RuntimeShape extended_output_shape =
|
||||
RuntimeShape::ExtendedShape(5, output_shape);
|
||||
CopyDimsToDesc(extended_output_shape, &desc_output);
|
||||
NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape,
|
||||
input_y_shape, &desc_condition, &desc_x,
|
||||
&desc_y);
|
||||
|
||||
// In Tensorflow, the dimensions are canonically named (batch_number, row,
|
||||
// col, channel), with extents (batches, height, width, depth), with the
|
||||
// trailing dimension changing most rapidly (channels has the smallest
|
||||
// stride, typically 1 element).
|
||||
//
|
||||
// In generated C code, we store arrays with the dimensions reversed. The
|
||||
// first dimension has smallest stride.
|
||||
//
|
||||
// We name our variables by their Tensorflow convention, but generate C code
|
||||
// nesting loops such that the innermost loop has the smallest stride for
|
||||
// the best cache behavior.
|
||||
for (int n = 0; n < desc_output.extents[0]; ++n) {
|
||||
int out_idx_n = desc_output.extents[1] * n;
|
||||
int cond_idx_n = desc_condition.strides[0] * n;
|
||||
int in_idx1_n = desc_x.strides[0] * n;
|
||||
int in_idx2_n = desc_y.strides[0] * n;
|
||||
for (int b = 0; b < desc_output.extents[1]; ++b) {
|
||||
int out_idx_b = (out_idx_n + b) * desc_output.extents[2];
|
||||
int cond_idx_b = cond_idx_n + desc_condition.strides[1] * b;
|
||||
int in_idx1_b = in_idx1_n + desc_x.strides[1] * b;
|
||||
int in_idx2_b = in_idx2_n + desc_y.strides[1] * b;
|
||||
for (int y = 0; y < desc_output.extents[2]; ++y) {
|
||||
int out_idx_y = (out_idx_b + y) * desc_output.extents[3];
|
||||
int cond_idx_y = cond_idx_b + desc_condition.strides[2] * y;
|
||||
int in_idx1_y = in_idx1_b + desc_x.strides[2] * y;
|
||||
int in_idx2_y = in_idx2_b + desc_y.strides[2] * y;
|
||||
for (int x = 0; x < desc_output.extents[3]; ++x) {
|
||||
int out_idx = (out_idx_y + x) * desc_output.extents[4];
|
||||
int cond_idx = cond_idx_y + desc_condition.strides[3] * x;
|
||||
int in_idx1 = in_idx1_y + desc_x.strides[3] * x;
|
||||
int in_idx2 = in_idx2_y + desc_y.strides[3] * x;
|
||||
for (int c = 0; c < desc_output.extents[4]; ++c) {
|
||||
output_data[out_idx] = input_condition_data[cond_idx]
|
||||
? input_x_data[in_idx1]
|
||||
: input_y_data[in_idx2];
|
||||
out_idx++;
|
||||
cond_idx += desc_condition.strides[4];
|
||||
in_idx1 += desc_x.strides[4];
|
||||
in_idx2 += desc_y.strides[4];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace reference_ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
|
||||
Reference in New Issue
Block a user