removed tflite-lib

This commit is contained in:
CaCO3
2022-09-23 21:29:17 +02:00
parent e713ffa52d
commit 75a653a5c7
283 changed files with 0 additions and 43131 deletions

View File

@@ -1,38 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
namespace tflite {
// The magic number in the template parameter is the maximum number of ops that
// can be added to AllOpsResolver. It can be increased if needed. And most
// applications that care about the memory footprint will want to directly use
// MicroMutableOpResolver and have an application specific template parameter.
// The examples directory has sample code for this.
class AllOpsResolver : public MicroMutableOpResolver<128> {
public:
AllOpsResolver();
private:
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_

View File

@@ -1,100 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/c_api_types.h"
namespace tflite {
// Interface classes that the TFLM framework relies on to get buffers it needs.
// There are two types of buffers that the TFLM framework requires: persistent
// and non-persistent. Persistent buffers, once allocated, are never freed by
// the TFLM framework. Non-persist buffers can be allocated and deallocated by
// the TFLM framework. This file defines two interfaces classes that TFLM
// framework will rely on to manage these buffers.
// Interface class for managing persistent buffers.
class IPersistentBufferAllocator {
public:
IPersistentBufferAllocator() {}
virtual ~IPersistentBufferAllocator() {}
// Allocates persistent memory. The persistent buffer is never freed.
virtual uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) = 0;
// Returns the size of all persistent allocations in bytes.
virtual size_t GetPersistentUsedBytes() const = 0;
};
// Interface class for managing non-persistent buffers.
// The default non-persistent buffers are temp buffers that are not resizable.
// Support of at least one resizable buffer is required.
class INonPersistentBufferAllocator {
public:
INonPersistentBufferAllocator() {}
virtual ~INonPersistentBufferAllocator() {}
// Allocates a temporary buffer. This buffer is not resizable.
virtual uint8_t* AllocateTemp(size_t size, size_t alignment) = 0;
// Signals that a temporary buffer is no longer needed.
virtual void DeallocateTemp(uint8_t* buf) = 0;
// Returns true if all temporary buffers are already deallocated.
virtual bool IsAllTempDeallocated() = 0;
// Signals that all temporary allocations can be reclaimed. TFLM calls this
// API when it knows that all temporary buffers that it requested has been
// deallocated. The goal of API is to facilitate implementations of
// INonPersistentBufferAllocator can reuse buffer with some reasonable
// complexity.
virtual TfLiteStatus ResetTempAllocations() = 0;
// Returns a buffer that is resizable viable ResizeBuffer().
virtual uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) = 0;
// Resizes a buffer that is previously returned by the
// AllocateResizableBuffer.
virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) = 0;
// Frees up the memory occupied by the resizable buffer.
virtual TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) = 0;
// Returns a pointer pointing to the start of the overlay memory, which is
// used for activation tensors and scratch buffers by kernels at Invoke stage.
virtual uint8_t* GetOverlayMemoryAddress() const = 0;
// Reserves the size of the overlay memory. This overlay is reserved for the
// kernels at Invoke stage. This is referred to as the overlay because before
// Invoket state, the same memory can be used for temp buffers. The layout of
// the memory is planned by the memory planner separately at Invoke stage.
virtual TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size,
size_t alignment) = 0;
// Returns the size of non-persistent buffer in use.
virtual size_t GetNonPersistentUsedBytes() const = 0;
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
virtual size_t GetAvailableMemory(size_t alignment) const = 0;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_

View File

@@ -1,170 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
NonPersistentArenaBufferAllocator::NonPersistentArenaBufferAllocator(
uint8_t* buffer, size_t buffer_size)
: buffer_head_(buffer),
buffer_tail_(buffer + buffer_size),
head_temp_(buffer),
next_temp_(buffer) {}
NonPersistentArenaBufferAllocator::~NonPersistentArenaBufferAllocator() {}
// Allocates a temporary buffer. This buffer is not resizable.
uint8_t* NonPersistentArenaBufferAllocator::AllocateTemp(size_t size,
size_t alignment) {
uint8_t* const aligned_result = AlignPointerUp(next_temp_, alignment);
const size_t available_memory = buffer_tail_ - aligned_result;
if (available_memory < size) {
MicroPrintf(
"Failed to allocate temp memory. Requested: %u, "
"available %u, missing: %u",
size, available_memory, size - available_memory);
return nullptr;
}
next_temp_ = aligned_result + size;
temp_buffer_ptr_check_sum_ ^= reinterpret_cast<intptr_t>(aligned_result);
temp_buffer_count_++;
return aligned_result;
}
// Signals that a temporary buffer is no longer needed.
void NonPersistentArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) {
temp_buffer_ptr_check_sum_ ^= reinterpret_cast<intptr_t>(temp_buf);
temp_buffer_count_--;
}
// Returns true if all temporary buffers are already deallocated.
bool NonPersistentArenaBufferAllocator::IsAllTempDeallocated() {
if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) {
MicroPrintf(
"Number of allocated temp buffers: %d. Checksum passing status: %d",
temp_buffer_count_, !temp_buffer_ptr_check_sum_);
return false;
}
return true;
}
// Signals that all temporary allocations can be reclaimed. TFLM calls this
// API when it knows that all temporary buffers that it requested has been
// deallocated. The goal of API is to facilitate implementations of
// INonPersistentBufferAllocator can reuse buffer with some reasonable
// complexity.
TfLiteStatus NonPersistentArenaBufferAllocator::ResetTempAllocations() {
if (!IsAllTempDeallocated()) {
MicroPrintf(
"All temp buffers must be freed before calling ResetTempAllocations()");
return kTfLiteError;
}
next_temp_ = head_temp_;
return kTfLiteOk;
}
// Returns a buffer that is resizable viable ResizeBuffer().
uint8_t* NonPersistentArenaBufferAllocator::AllocateResizableBuffer(
size_t size, size_t alignment) {
// Only supports one resizable buffer, which starts at the buffer head.
uint8_t* expected_resizable_buf = AlignPointerUp(buffer_head_, alignment);
if (resizable_buffer_allocated_) {
MicroPrintf(
"Cannot allocate a new resizable buffer when one is already allocated");
return nullptr;
}
if (ResizeBuffer(expected_resizable_buf, size, alignment) == kTfLiteOk) {
resizable_buffer_allocated_ = true;
return expected_resizable_buf;
}
return nullptr;
}
// Resizes a buffer that is previously returned by the AllocateResizableBuffer.
// Note that ResizeBuffer(old_resizable_buf, 0, 1) effectively deallocates
// a previous allocated resizable buffer.
TfLiteStatus NonPersistentArenaBufferAllocator::ResizeBuffer(
uint8_t* resizable_buf, size_t size, size_t alignment) {
// Only supports one resizable buffer, which starts at the buffer head.
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
if (resizable_buf != expect_resizable_buf) {
MicroPrintf("Internal error: buffer is not resizable");
return kTfLiteError;
}
if (head_temp_ != next_temp_) {
MicroPrintf("ResetTempAllocations() is not called before ResizeBuffer().");
return kTfLiteError;
}
const size_t available_memory = buffer_tail_ - expect_resizable_buf;
if (available_memory < size) {
MicroPrintf(
"Failed to resize buffer. Requested: %u, available %u, missing: %u",
size, available_memory, size - available_memory);
return kTfLiteError;
}
head_temp_ = expect_resizable_buf + size;
next_temp_ = head_temp_;
return kTfLiteOk;
}
// Frees up the memory occupied by the resizable buffer.
TfLiteStatus NonPersistentArenaBufferAllocator::DeallocateResizableBuffer(
uint8_t* resizable_buf) {
TfLiteStatus status = ResizeBuffer(resizable_buf, 0, 1);
if (status == kTfLiteOk) {
resizable_buffer_allocated_ = false;
}
return status;
}
// Returns a pointer pointing to the start of the overlay memory, which is
// used for activation tensors and scratch buffers by kernels at Invoke stage.
uint8_t* NonPersistentArenaBufferAllocator::GetOverlayMemoryAddress() const {
return buffer_head_;
}
// Reserves the size of the overlay memory. This overlay is reserved for the
// kernels at Invoke stage. This is referred to as the overlay because before
// Invoket state, the same memory can be used for temp buffers. The layout of
// the memory is planned by the memory planner separately at Invoke stage.
TfLiteStatus
NonPersistentArenaBufferAllocator::ReserveNonPersistentOverlayMemory(
size_t size, size_t alignment) {
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
return ResizeBuffer(expect_resizable_buf, size, alignment);
}
// Returns the size of non-persistent buffer in use.
size_t NonPersistentArenaBufferAllocator::GetNonPersistentUsedBytes() const {
return (next_temp_ - buffer_head_);
}
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
size_t NonPersistentArenaBufferAllocator::GetAvailableMemory(
size_t alignment) const {
uint8_t* const aligned_temp = AlignPointerUp(next_temp_, alignment);
uint8_t* const aligned_tail = AlignPointerDown(buffer_tail_, alignment);
return aligned_tail - aligned_temp;
}
} // namespace tflite

View File

@@ -1,105 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// Implement INonPersistentBufferAllocator on an arena that is dedicated for
// non-persistent buffers.
class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator {
public:
NonPersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size);
virtual ~NonPersistentArenaBufferAllocator();
// Allocates a temporary buffer. This buffer is not resizable.
uint8_t* AllocateTemp(size_t size, size_t alignment) override;
// Signals that a temporary buffer is no longer needed.
void DeallocateTemp(uint8_t* buf) override;
// Returns true if all temporary buffers are already deallocated.
bool IsAllTempDeallocated() override;
// Signals that all temporary allocations can be reclaimed. TFLM calls this
// API when it knows that all temporary buffers that it requested has been
// deallocated.
TfLiteStatus ResetTempAllocations() override;
// Returns a buffer that is resizable viable ResizeBuffer().
uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) override;
// Resizes a buffer that is previously returned by the
// AllocateResizableBuffer.
TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) override;
// Frees up the memory occupied by the resizable buffer.
TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) override;
// Returns a pointer pointing to the start of the overlay memory, which is
// used for activation tensors and scratch buffers by kernels at Invoke stage.
uint8_t* GetOverlayMemoryAddress() const override;
// Reserves the size of the overlay memory. This overlay is reserved for the
// kernels at Invoke stage. This is referred to as the overlay because before
// Invoket state, the same memory can be used for temp buffers. The layout of
// the memory is planned by the memory planner separately at Invoke stage.
TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size,
size_t alignment) override;
// Returns the size of non-persistent buffer in use.
size_t GetNonPersistentUsedBytes() const override;
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
size_t GetAvailableMemory(size_t alignment) const override;
TF_LITE_REMOVE_VIRTUAL_DELETE
private:
// The memory arena that this allocator manages.
uint8_t* const buffer_head_;
uint8_t* const buffer_tail_;
// The whole region is split into two parts:
// buffer_head_ to head_temp_ - 1 belongs to the only resizable buffer.
// head_temp_ to buffer_tail_ can be used for (non-resizable) temp buffers.
uint8_t* head_temp_;
// next_temp_ points to the next available temp buffer allocation address and
// its range is between head_temp_ and buffer_tail_
uint8_t* next_temp_;
// XOR Check sum for outstanding temp buffers.
// If all temp buffers are deallocated OR no temp buffers are allocated,
// temp_buffer_ptr_check_sum_ == nullptr.
intptr_t temp_buffer_ptr_check_sum_ = 0;
// Count of outstanding temp buffers.
int temp_buffer_count_ = 0;
bool resizable_buffer_allocated_ = false;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_

View File

@@ -1,52 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
PersistentArenaBufferAllocator::PersistentArenaBufferAllocator(
uint8_t* buffer, size_t buffer_size)
: buffer_head_(buffer),
buffer_tail_(buffer + buffer_size),
tail_temp_(buffer_tail_) {}
PersistentArenaBufferAllocator::~PersistentArenaBufferAllocator() {}
uint8_t* PersistentArenaBufferAllocator::AllocatePersistentBuffer(
size_t size, size_t alignment) {
uint8_t* const aligned_result =
AlignPointerDown(tail_temp_ - size, alignment);
if (aligned_result < buffer_head_) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const size_t missing_memory = buffer_head_ - aligned_result;
MicroPrintf(
"Failed to allocate tail memory. Requested: %u, "
"available %u, missing: %u",
size, size - missing_memory, missing_memory);
#endif
return nullptr;
}
tail_temp_ = aligned_result;
return aligned_result;
}
size_t PersistentArenaBufferAllocator::GetPersistentUsedBytes() const {
return buffer_tail_ - tail_temp_;
}
} // namespace tflite

View File

@@ -1,59 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// PersistentArenaBufferAllocator is an implementatation of
// IPersistentBufferAllocator interface on an arena that is dedicated for
// persistent buffers.
class PersistentArenaBufferAllocator : public IPersistentBufferAllocator {
public:
PersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size);
virtual ~PersistentArenaBufferAllocator();
// Allocates persistent memory. The persistent buffer is never freed.
// Returns nullptr if errors occured.
uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override;
// Returns the size of all persistent allocations in bytes.
size_t GetPersistentUsedBytes() const override;
TF_LITE_REMOVE_VIRTUAL_DELETE
private:
// The memory arena that this allocator manages.
uint8_t* const buffer_head_;
uint8_t* const buffer_tail_;
// The whole region is split into two parts:
// tail_temp_ to buffer_tail_ contains allocated buffers;
// buffer_head_ to tail_temp_ - 1 belongs to still available spaces.
// So in essence, the allocated region grows from the bottom and emulates
// SingleArenaBufferAllocator's persistent part.
uint8_t* tail_temp_;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_

View File

@@ -1,87 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h"
#include <new>
#include "tensorflow/lite/kernels/internal/compatibility.h"
namespace tflite {
RecordingSingleArenaBufferAllocator::RecordingSingleArenaBufferAllocator(
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size)
: SingleArenaBufferAllocator(error_reporter, buffer_head, buffer_size),
requested_head_bytes_(0),
requested_tail_bytes_(0),
used_bytes_(0),
alloc_count_(0) {}
RecordingSingleArenaBufferAllocator::~RecordingSingleArenaBufferAllocator() {}
RecordingSingleArenaBufferAllocator*
RecordingSingleArenaBufferAllocator::Create(ErrorReporter* error_reporter,
uint8_t* buffer_head,
size_t buffer_size) {
TFLITE_DCHECK(error_reporter != nullptr);
TFLITE_DCHECK(buffer_head != nullptr);
RecordingSingleArenaBufferAllocator tmp = RecordingSingleArenaBufferAllocator(
error_reporter, buffer_head, buffer_size);
uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer(
sizeof(RecordingSingleArenaBufferAllocator),
alignof(RecordingSingleArenaBufferAllocator));
// Use the default copy constructor to populate internal states.
return new (allocator_buffer) RecordingSingleArenaBufferAllocator(tmp);
}
size_t RecordingSingleArenaBufferAllocator::GetRequestedBytes() const {
return requested_head_bytes_ + requested_tail_bytes_;
}
size_t RecordingSingleArenaBufferAllocator::GetUsedBytes() const {
return used_bytes_;
}
size_t RecordingSingleArenaBufferAllocator::GetAllocatedCount() const {
return alloc_count_;
}
TfLiteStatus RecordingSingleArenaBufferAllocator::ResizeBuffer(
uint8_t* resizable_buf, size_t size, size_t alignment) {
const uint8_t* previous_head = head();
TfLiteStatus status =
SingleArenaBufferAllocator::ResizeBuffer(resizable_buf, size, alignment);
if (status == kTfLiteOk) {
used_bytes_ += head() - previous_head;
requested_head_bytes_ = size;
}
return status;
}
uint8_t* RecordingSingleArenaBufferAllocator::AllocatePersistentBuffer(
size_t size, size_t alignment) {
const uint8_t* previous_tail = tail();
uint8_t* result =
SingleArenaBufferAllocator::AllocatePersistentBuffer(size, alignment);
if (result != nullptr) {
used_bytes_ += previous_tail - tail();
requested_tail_bytes_ += size;
alloc_count_++;
}
return result;
}
} // namespace tflite

View File

@@ -1,64 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// Utility class used to log allocations of a SingleArenaBufferAllocator. Should
// only be used in debug/evaluation settings or unit tests to evaluate
// allocation usage.
class RecordingSingleArenaBufferAllocator : public SingleArenaBufferAllocator {
public:
RecordingSingleArenaBufferAllocator(ErrorReporter* error_reporter,
uint8_t* buffer_head, size_t buffer_size);
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
~RecordingSingleArenaBufferAllocator() override;
static RecordingSingleArenaBufferAllocator* Create(
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size);
// Returns the number of bytes requested from the head or tail.
size_t GetRequestedBytes() const;
// Returns the number of bytes actually allocated from the head or tail. This
// value will be >= to the number of requested bytes due to padding and
// alignment.
size_t GetUsedBytes() const;
// Returns the number of alloc calls from the head or tail.
size_t GetAllocatedCount() const;
TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) override;
uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override;
private:
size_t requested_head_bytes_;
size_t requested_tail_bytes_;
size_t used_bytes_;
size_t alloc_count_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_

View File

@@ -1,209 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
#include <cstddef>
#include <cstdint>
#include <new>
#include "tensorflow/lite/c/c_api_types.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
SingleArenaBufferAllocator::SingleArenaBufferAllocator(
ErrorReporter* error_reporter, uint8_t* buffer_head, uint8_t* buffer_tail)
:
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
error_reporter_(error_reporter),
#endif
buffer_head_(buffer_head),
buffer_tail_(buffer_tail),
head_(buffer_head),
tail_(buffer_tail),
temp_(buffer_head_) {
}
SingleArenaBufferAllocator::SingleArenaBufferAllocator(
ErrorReporter* error_reporter, uint8_t* buffer, size_t buffer_size)
: SingleArenaBufferAllocator(error_reporter, buffer, buffer + buffer_size) {
}
/* static */
SingleArenaBufferAllocator* SingleArenaBufferAllocator::Create(
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
TFLITE_DCHECK(error_reporter != nullptr);
TFLITE_DCHECK(buffer_head != nullptr);
SingleArenaBufferAllocator tmp =
SingleArenaBufferAllocator(error_reporter, buffer_head, buffer_size);
// Allocate enough bytes from the buffer to create a
// SingleArenaBufferAllocator. The new instance will use the current adjusted
// tail buffer from the tmp allocator instance.
uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer(
sizeof(SingleArenaBufferAllocator), alignof(SingleArenaBufferAllocator));
// Use the default copy constructor to populate internal states.
return new (allocator_buffer) SingleArenaBufferAllocator(tmp);
}
SingleArenaBufferAllocator::~SingleArenaBufferAllocator() {}
uint8_t* SingleArenaBufferAllocator::AllocateResizableBuffer(size_t size,
size_t alignment) {
// Only supports one resizable buffer, which starts at the buffer head.
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
if (ResizeBuffer(expect_resizable_buf, size, alignment) == kTfLiteOk) {
return expect_resizable_buf;
}
return nullptr;
}
TfLiteStatus SingleArenaBufferAllocator::DeallocateResizableBuffer(
uint8_t* resizable_buf) {
return ResizeBuffer(resizable_buf, 0, 1);
}
TfLiteStatus SingleArenaBufferAllocator::ReserveNonPersistentOverlayMemory(
size_t size, size_t alignment) {
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
return ResizeBuffer(expect_resizable_buf, size, alignment);
}
TfLiteStatus SingleArenaBufferAllocator::ResizeBuffer(uint8_t* resizable_buf,
size_t size,
size_t alignment) {
// Only supports one resizable buffer, which starts at the buffer head.
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
if (head_ != temp_ || resizable_buf != expect_resizable_buf) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Internal error: either buffer is not resizable or "
"ResetTempAllocations() is not called before ResizeBuffer().");
return kTfLiteError;
}
uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment);
const size_t available_memory = tail_ - aligned_result;
if (available_memory < size) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"Failed to resize buffer. Requested: %u, available %u, missing: %u",
size, available_memory, size - available_memory);
return kTfLiteError;
}
head_ = aligned_result + size;
temp_ = head_;
return kTfLiteOk;
}
uint8_t* SingleArenaBufferAllocator::AllocatePersistentBuffer(
size_t size, size_t alignment) {
uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment);
if (aligned_result < head_) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
const size_t missing_memory = head_ - aligned_result;
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to allocate tail memory. Requested: %u, "
"available %u, missing: %u",
size, size - missing_memory, missing_memory);
#endif
return nullptr;
}
tail_ = aligned_result;
return aligned_result;
}
uint8_t* SingleArenaBufferAllocator::AllocateTemp(size_t size,
size_t alignment) {
uint8_t* const aligned_result = AlignPointerUp(temp_, alignment);
const size_t available_memory = tail_ - aligned_result;
if (available_memory < size) {
TF_LITE_REPORT_ERROR(error_reporter_,
"Failed to allocate temp memory. Requested: %u, "
"available %u, missing: %u",
size, available_memory, size - available_memory);
return nullptr;
}
temp_ = aligned_result + size;
temp_buffer_ptr_check_sum_ ^= (reinterpret_cast<intptr_t>(aligned_result));
temp_buffer_count_++;
return aligned_result;
}
void SingleArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) {
temp_buffer_ptr_check_sum_ ^= (reinterpret_cast<intptr_t>(temp_buf));
temp_buffer_count_--;
}
bool SingleArenaBufferAllocator::IsAllTempDeallocated() {
if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) {
MicroPrintf(
"Number of allocated temp buffers: %d. Checksum passing status: %d",
temp_buffer_count_, !temp_buffer_ptr_check_sum_);
return false;
}
return true;
}
TfLiteStatus SingleArenaBufferAllocator::ResetTempAllocations() {
// TODO(b/209453859): enable error check based on IsAllTempDeallocated after
// all AllocateTemp have been paird with DeallocateTemp
if (!IsAllTempDeallocated()) {
MicroPrintf(
"All temp buffers must be freed before calling ResetTempAllocations()");
return kTfLiteError;
}
temp_ = head_;
return kTfLiteOk;
}
uint8_t* SingleArenaBufferAllocator::GetOverlayMemoryAddress() const {
return buffer_head_;
}
size_t SingleArenaBufferAllocator::GetNonPersistentUsedBytes() const {
return std::max(head_ - buffer_head_, temp_ - buffer_head_);
}
size_t SingleArenaBufferAllocator::GetPersistentUsedBytes() const {
return buffer_tail_ - tail_;
}
size_t SingleArenaBufferAllocator::GetAvailableMemory(size_t alignment) const {
uint8_t* const aligned_temp = AlignPointerUp(temp_, alignment);
uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment);
return aligned_tail - aligned_temp;
}
size_t SingleArenaBufferAllocator::GetUsedBytes() const {
return GetPersistentUsedBytes() + GetNonPersistentUsedBytes();
}
size_t SingleArenaBufferAllocator::GetBufferSize() const {
return buffer_tail_ - buffer_head_;
}
uint8_t* SingleArenaBufferAllocator::head() const { return head_; }
uint8_t* SingleArenaBufferAllocator::tail() const { return tail_; }
} // namespace tflite

View File

@@ -1,152 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h"
#include "tensorflow/lite/micro/compatibility.h"
namespace tflite {
// TODO(petewarden): This allocator never frees up or reuses any memory, even
// though we have enough information about lifetimes of the tensors to do so.
// This makes it pretty wasteful, so we should use a more intelligent method.
class SingleArenaBufferAllocator : public INonPersistentBufferAllocator,
public IPersistentBufferAllocator {
public:
// TODO(b/157615197): Cleanup constructors/destructor and use factory
// functions.
SingleArenaBufferAllocator(ErrorReporter* error_reporter,
uint8_t* buffer_head, uint8_t* buffer_tail);
SingleArenaBufferAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
size_t buffer_size);
virtual ~SingleArenaBufferAllocator();
// Creates a new SingleArenaBufferAllocator from a given buffer head and size.
static SingleArenaBufferAllocator* Create(ErrorReporter* error_reporter,
uint8_t* buffer_head,
size_t buffer_size);
// Resizes a buffer that is previously returned by the
// AllocateResizableBuffer. In current implementation, it Adjusts the head
// (lowest address and moving upwards) memory allocation to a given size.
// Calls to this method will also invalidate all temporary allocation values
// (it sets the location of temp space at the end of the head section). This
// call will fail if a chain of allocations through AllocateTemp() have not
// been cleaned up with a call to ResetTempAllocations().
virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
size_t alignment) override;
// Returns a buffer that is resizable viable ResizeBuffer(). Only one
// resizable buffer is currently supported.
virtual uint8_t* AllocateResizableBuffer(size_t size,
size_t alignment) override;
// Frees up the memory occupied by the resizable buffer
virtual TfLiteStatus DeallocateResizableBuffer(
uint8_t* resizable_buf) override;
// Reserves the non-persistent memory that is planned by the memory planner.
virtual TfLiteStatus ReserveNonPersistentOverlayMemory(
size_t size, size_t alignment) override;
// Allocates persistent memory starting at the tail of the arena (highest
// address and moving downwards).
virtual uint8_t* AllocatePersistentBuffer(size_t size,
size_t alignment) override;
// Allocates a temporary buffer from the head of the arena (lowest address and
// moving upwards) but does not update the actual head allocation size or
// position. The returned buffer is guaranteed until either
// ResetTempAllocations() is called or another call to AllocateFromHead().
// Repeat calls to this function will create a chain of temp allocations. All
// calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
// AllocateFromHead() is called before a call to ResetTempAllocations(), it
// will fail with an error message.
virtual uint8_t* AllocateTemp(size_t size, size_t alignment) override;
// Signals that a temporary buffer is no longer needed. This is currently for
// book-keeping purpose and the memory region are not immediately available
// for re-use. The deallocated memory region are only reclaimed after
// ResetTempAllocations is called as it is right now.
virtual void DeallocateTemp(uint8_t* buf) override;
// Returns true if all temporary buffers are already deallocated.
virtual bool IsAllTempDeallocated() override;
// Resets a chain of temporary allocations back to the current head of the
// arena (lowest address).
virtual TfLiteStatus ResetTempAllocations() override;
// Returns a pointer to the buffer currently assigned to the head section.
// This buffer is set by calling SetHeadSize().
uint8_t* GetOverlayMemoryAddress() const override;
// Returns the size of the head section in bytes.
size_t GetNonPersistentUsedBytes() const override;
// Returns the size of all allocations in the tail section in bytes.
size_t GetPersistentUsedBytes() const override;
// Returns the number of bytes available with a given alignment. This number
// takes in account any temporary allocations.
size_t GetAvailableMemory(size_t alignment) const override;
// Returns the number of used bytes in the allocator. This number takes in
// account any temporary allocations.
size_t GetUsedBytes() const;
TF_LITE_REMOVE_VIRTUAL_DELETE
protected:
// Returns a pointer to the current end of the head buffer.
uint8_t* head() const;
// Returns a pointer to the current end of the tail buffer.
uint8_t* tail() const;
private:
size_t GetBufferSize() const;
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
ErrorReporter* error_reporter_;
#endif
uint8_t* buffer_head_;
uint8_t* buffer_tail_;
uint8_t* head_;
uint8_t* tail_;
uint8_t* temp_;
// The combination of the checksum of outstanding temporary buffer pointers
// AND the count of outstanding temporary buffer provide a low cost mechanism
// to audit temporary buffers' allocation and deallocation.
//
// XOR Check sum for outstanding temp buffers.
// If all temp buffers are deallocated OR no temp buffers are allocated,
// temp_buffer_ptr_check_sum_ == nullptr.
intptr_t temp_buffer_ptr_check_sum_ = 0;
// Count of outstanding temp buffers.
int temp_buffer_count_ = 0;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_

View File

@@ -1,32 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
// C++ will automatically create class-specific delete operators for virtual
// objects, which by default call the global delete function. For embedded
// applications we want to avoid this, and won't be calling new/delete on these
// objects, so we need to override the default implementation with one that does
// nothing to avoid linking in ::delete().
// This macro needs to be included in all subclasses of a virtual base class in
// the private section.
#ifdef TF_LITE_STATIC_MEMORY
#define TF_LITE_REMOVE_VIRTUAL_DELETE \
void operator delete(void* p) {}
#else
#define TF_LITE_REMOVE_VIRTUAL_DELETE
#endif
#endif // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_

View File

@@ -1,50 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Reference implementation of the DebugLog() function that's required for a
// platform to support the TensorFlow Lite for Microcontrollers library. This is
// the only function that's absolutely required to be available on a target
// device, since it's used for communicating test results back to the host so
// that we can verify the implementation is working correctly.
// It's designed to be as easy as possible to supply an implementation though.
// On platforms that have a POSIX stack or C library, it can be written as a
// single call to `fprintf(stderr, "%s", s)` to output a string to the error
// stream of the console, but if there's no OS or C library available, there's
// almost always an equivalent way to write out a string to some serial
// interface that can be used instead. For example on Arm M-series MCUs, calling
// the `bkpt #0xAB` assembler instruction will output the string in r1 to
// whatever debug serial connection is available. If you're running mbed, you
// can do the same by creating `Serial pc(USBTX, USBRX)` and then calling
// `pc.printf("%s", s)`.
// To add an equivalent function for your own platform, create your own
// implementation file, and place it in a subfolder with named after the OS
// you're targeting. For example, see the Cortex M bare metal version in
// tensorflow/lite/micro/bluepill/debug_log.cc or the mbed one on
// tensorflow/lite/micro/mbed/debug_log.cc.
#include "tensorflow/lite/micro/debug_log.h"
#ifndef TF_LITE_STRIP_ERROR_STRINGS
#include <cstdio>
#endif
extern "C" void DebugLog(const char* s) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
// Reusing TF_LITE_STRIP_ERROR_STRINGS to disable DebugLog completely to get
// maximum reduction in binary size. This is because we have DebugLog calls
// via TF_LITE_CHECK that are not stubbed out by TF_LITE_REPORT_ERROR.
fprintf(stderr, "%s", s);
#endif
}

View File

@@ -1,31 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
// This function should be implemented by each target platform, and provide a
// way for strings to be output to some text stream. For more information, see
// tensorflow/lite/micro/debug_log.cc.
void DebugLog(const char* s);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_

View File

@@ -1,107 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/fake_micro_context.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_arena_constants.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
namespace {
// Dummy static variables to allow creation of dummy MicroAllocator.
// All tests are guarateed to run serially.
static constexpr int KDummyTensorArenaSize = 256;
static uint8_t dummy_tensor_arena[KDummyTensorArenaSize];
} // namespace
FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors,
SingleArenaBufferAllocator* allocator,
MicroGraph* micro_graph)
: MicroContext(
MicroAllocator::Create(dummy_tensor_arena, KDummyTensorArenaSize,
GetMicroErrorReporter()),
nullptr, micro_graph),
tensors_(tensors),
allocator_(allocator) {}
TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) {
allocated_tensor_count_++;
return &tensors_[tensor_index];
}
void FakeMicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
allocated_tensor_count_--;
}
bool FakeMicroContext::IsAllTempTfLiteTensorDeallocated() {
return !allocated_tensor_count_;
}
TfLiteEvalTensor* FakeMicroContext::GetEvalTensor(int tensor_index) {
TfLiteEvalTensor* eval_tensor =
reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocateTemp(
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
TFLITE_DCHECK(eval_tensor != nullptr);
// In unit tests, the TfLiteTensor pointer contains the source of truth for
// buffers and values:
eval_tensor->data = tensors_[tensor_index].data;
eval_tensor->dims = tensors_[tensor_index].dims;
eval_tensor->type = tensors_[tensor_index].type;
return eval_tensor;
}
void* FakeMicroContext::AllocatePersistentBuffer(size_t bytes) {
// FakeMicroContext use SingleArenaBufferAllocator, which does not
// automatically apply the buffer alignment like MicroAllocator. The buffer
// alignment is potentially wasteful but allows the fake_micro_context to work
// correctly with optimized kernels.
return allocator_->AllocatePersistentBuffer(bytes,
MicroArenaBufferAlignment());
}
TfLiteStatus FakeMicroContext::RequestScratchBufferInArena(size_t bytes,
int* buffer_index) {
TFLITE_DCHECK(buffer_index != nullptr);
if (scratch_buffer_count_ == kNumScratchBuffers_) {
MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).",
kNumScratchBuffers_);
return kTfLiteError;
}
// For tests, we allocate scratch buffers from the tail and keep them around
// for the lifetime of model. This means that the arena size in the tests will
// be more than what we would have if the scratch buffers could share memory.
scratch_buffers_[scratch_buffer_count_] =
allocator_->AllocatePersistentBuffer(bytes, MicroArenaBufferAlignment());
TFLITE_DCHECK(scratch_buffers_[scratch_buffer_count_] != nullptr);
*buffer_index = scratch_buffer_count_++;
return kTfLiteOk;
}
void* FakeMicroContext::GetScratchBuffer(int buffer_index) {
TFLITE_DCHECK(scratch_buffer_count_ <= kNumScratchBuffers_);
if (buffer_index >= scratch_buffer_count_) {
return nullptr;
}
return scratch_buffers_[buffer_index];
}
} // namespace tflite

View File

@@ -1,56 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
#define TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
namespace tflite {
// A fake of MicroContext for kernel util tests.
class FakeMicroContext : public MicroContext {
public:
FakeMicroContext(TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator,
MicroGraph* micro_graph);
void* AllocatePersistentBuffer(size_t bytes) override;
TfLiteStatus RequestScratchBufferInArena(size_t bytes,
int* buffer_index) override;
void* GetScratchBuffer(int buffer_index) override;
TfLiteTensor* AllocateTempTfLiteTensor(int tensor_index) override;
void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) override;
bool IsAllTempTfLiteTensorDeallocated();
TfLiteEvalTensor* GetEvalTensor(int tensor_index) override;
private:
static constexpr int kNumScratchBuffers_ = 12;
int scratch_buffer_count_ = 0;
uint8_t* scratch_buffers_[kNumScratchBuffers_];
TfLiteTensor* tensors_;
int allocated_tensor_count_ = 0;
SingleArenaBufferAllocator* allocator_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_

View File

@@ -1,84 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/flatbuffer_utils.h"
namespace tflite {
FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size)
: flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {}
int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadInt64(elem, byte_width_);
}
uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadUInt64(elem, byte_width_);
}
int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const {
return static_cast<int32_t>(ElementAsInt64(i));
}
bool FlexbufferWrapper::ElementAsBool(size_t i) const {
return static_cast<bool>(ElementAsUInt64(i));
}
double FlexbufferWrapper::ElementAsDouble(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadDouble(elem, byte_width_);
}
float FlexbufferWrapper::ElementAsFloat(size_t i) const {
return static_cast<float>(FlexbufferWrapper::ElementAsDouble(i));
}
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
uint32_t NumSubgraphOperators(const SubGraph* subgraph) {
if (subgraph->operators() != nullptr) {
return subgraph->operators()->size();
} else {
return 0;
}
}
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
return NumSubgraphOperators(subgraph);
}
TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray(
const flatbuffers::Vector<int32_t>* flatbuffer_array) {
// On little-endian machines, TfLiteIntArray happens to have the same memory
// layout as flatbuffers:Vector<int32_t>, so we can reinterpret_cast the
// flatbuffer vector and avoid a copy and malloc.
// TODO(b/188459715): audit this usage of const_cast.
return const_cast<TfLiteIntArray*>(
reinterpret_cast<const TfLiteIntArray*>(flatbuffer_array));
}
TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray(
const flatbuffers::Vector<float>* flatbuffer_array) {
// On little-endian machines, TfLiteFloatArray happens to have the same memory
// layout as flatbuffers:Vector<float>, so we can reinterpret_cast the
// flatbuffer vector and avoid a copy and malloc.
// TODO(b/188459715): audit this usage of const_cast.
return const_cast<TfLiteFloatArray*>(
reinterpret_cast<const TfLiteFloatArray*>(flatbuffer_array));
}
} // namespace tflite

View File

@@ -1,65 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
#include "flatbuffers/flatbuffers.h"
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Kernels use flexbuffers::Map to pack their init parameters in a tflite file,
// with the parameter names as map keys and the parameter values as the
// corresponding map values.
// Accessing the map values using the flexbuffers:Map class is inline heavy,
// which can cause the code size to bloat beyond what's reasonable for a micro
// application. Use this class instead, when possible.
// FlexbufferWrapper takes advantage of the following properties of
// flexbuffers::Map:
// 1. It can be viewed as a flexbuffers::Vector of the values.
// 2. The values in the vector are ordered alphabetically by their keys.
// 3. All integer and Boolean values are stored as 64-bit numbers.
// 4. All floating point values are stored as double precision numbers.
// The properties are mentioned in the flexbuffers docs, but we rely on
// a unit test to catch design changes.
class FlexbufferWrapper : public flexbuffers::Vector {
public:
// Construct with a serialized flexbuffer 'buffer' of 'size' bytes
explicit FlexbufferWrapper(const uint8_t* buffer, size_t size);
int64_t ElementAsInt64(size_t i) const;
uint64_t ElementAsUInt64(size_t i) const;
int32_t ElementAsInt32(size_t i) const;
bool ElementAsBool(size_t i) const;
double ElementAsDouble(size_t i) const;
float ElementAsFloat(size_t i) const;
};
// Return the number of operators in a subgraph tflite
uint32_t NumSubgraphOperators(const SubGraph* subgraph);
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx);
// Converts a flatbuffer array to a TfLiteArray.
// TODO(b/188459715): These function convert a const input to a non-const via a
// const_cast. It is unclear exactly why this is required.
TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray(
const flatbuffers::Vector<int32_t>* flatbuffer_array);
TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray(
const flatbuffers::Vector<float>* flatbuffer_array);
} // namespace tflite
#endif // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_

View File

@@ -1,57 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
#include <algorithm>
#include <cmath>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/max.h"
#include "tensorflow/lite/kernels/internal/min.h"
namespace tflite {
namespace ops {
namespace micro {
// Returns the floating point value for a fused activation:
inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
switch (act) {
case kTfLiteActNone:
return a;
case kTfLiteActRelu:
return TfLiteMax(0.0f, a);
case kTfLiteActReluN1To1:
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
case kTfLiteActRelu6:
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
case kTfLiteActTanh:
return std::tanh(a);
case kTfLiteActSignBit:
return std::signbit(a);
case kTfLiteActSigmoid:
return 1.0f / (1.0f + std::exp(-a));
}
return 0.0f; // To indicate an unsupported activation (i.e. when a new fused
// activation is added to the enum and not handled here).
}
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_

View File

@@ -1,120 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/activations.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
}
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
ReluFloat(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
tflite::ReluQuantized(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default: {
MicroPrintf("Only float32 is supported currently, got %s",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
}
void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
}
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
Relu6Float(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
case kTfLiteInt8: {
Relu6Quantized(data.zero_int8, data.six_int8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default: {
MicroPrintf("Only float32 is supported currently, got %s",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
}
} // namespace
TfLiteRegistration Register_RELU() {
return tflite::micro::RegisterOp(ReluInit, ReluPrepare, ReluEval);
}
TfLiteRegistration Register_RELU6() {
return tflite::micro::RegisterOp(Relu6Init, Relu6Prepare, Relu6Eval);
}
} // namespace tflite

View File

@@ -1,63 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
extern const int kActivationsInputTensor;
extern const int kActivationsOutputTensor;
struct ReluOpData {
ReluParams params;
};
struct Relu6OpData {
int8_t six_int8;
int8_t zero_int8;
};
void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const int8_t* input_data,
int8_t* output_data);
template <typename T>
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data);
void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data);
void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data);
void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data);
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node);
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_

View File

@@ -1,158 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <algorithm>
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activations.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
const int kActivationsInputTensor = 0;
const int kActivationsOutputTensor = 0;
void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const int8_t* input_data,
int8_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t val = static_cast<int32_t>(input_data[i]);
int32_t clamped =
data.params.output_offset +
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
data.params.output_multiplier,
data.params.output_shift);
clamped = std::max(data.params.quantized_activation_min, clamped);
clamped = std::min(data.params.quantized_activation_max, clamped);
output_data[i] = static_cast<int8_t>(clamped);
}
}
template <typename T>
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
const RuntimeShape input_shape = GetTensorShape(input);
const RuntimeShape output_shape = GetTensorShape(output);
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
&data->params.output_shift);
data->params.quantized_activation_min = std::max(
static_cast<int32_t>(std::numeric_limits<T>::min()),
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float lower = 0.0f;
const float clamped = val < lower ? lower : val;
output_data[i] = clamped;
}
}
void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 6.0f;
const float lower = 0.0f;
const float clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int8_t val = input_data[i];
const int8_t clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kActivationsOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
if (input->type == kTfLiteInt8) {
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_int8 = input->params.zero_point;
}
micro_context->DeallocateTempTfLiteTensor(input);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,165 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/add.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
const OpDataAdd* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
SetActivationParams(data->output_activation_min_f32,
data->output_activation_max_f32, &op_params);
if (data->requires_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
}
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteAddParams* params, const OpDataAdd* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
switch (output->type) {
case kTfLiteInt8: {
if (need_broadcast) {
reference_integer_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
reference_integer_ops::Add(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
break;
}
case kTfLiteInt16: {
if (need_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output),
false);
}
break;
}
default:
MicroPrintf("Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
void* AddInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd));
}
TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kAddInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kAddInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kAddOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteRegistration Register_ADD() {
return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval);
}
} // namespace tflite

View File

@@ -1,106 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/add.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
const int kAddInputTensor1 = 0;
const int kAddInputTensor2 = 1;
const int kAddOutputTensor = 0;
TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output, OpDataAdd* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
const double twice_max_input_scale =
2 * static_cast<double>(
std::max(input1->params.scale, input2->params.scale));
const double real_input1_multiplier =
static_cast<double>(input1->params.scale) / twice_max_input_scale;
const double real_input2_multiplier =
static_cast<double>(input2->params.scale) / twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << data->left_shift) * static_cast<double>(output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
QuantizeMultiplierSmallerThanOneExp(
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
} else if (output->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation,
&data->output_activation_min_f32,
&data->output_activation_max_f32);
}
return kTfLiteOk;
}
TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kAddInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kAddInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kAddOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
OpDataAdd* data = static_cast<OpDataAdd*>(node->user_data);
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TF_LITE_ENSURE_STATUS(
CalculateOpDataAdd(context, params, input1, input2, output, data));
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,101 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/micro/micro_resource_variable.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
constexpr int kInputVariableId = 0;
constexpr int kInputValue = 1;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 0);
// This must be a TfLiteEvalTensor despite this being in Prepare, because
// CreateTensor allocates a temp tensor from the flatbuffer, which does not
// contain the correct ID generated within the VAR_HANDLE op. EvalTensors are
// all allocated during StartModelAllocation which happens before
// init/prepare, and VAR_HANDLE Prepare() references its own op_data in the
// TfLiteEvalTensor, so reading the ID here is valid.
const TfLiteEvalTensor* input_resource_id_tensor =
tflite::micro::GetEvalInput(context, node, kInputVariableId);
TFLITE_DCHECK(input_resource_id_tensor != nullptr);
TF_LITE_ENSURE(context, (input_resource_id_tensor->type == kTfLiteResource ||
input_resource_id_tensor->type == kTfLiteInt32));
TF_LITE_ENSURE_EQ(context, NumElements(input_resource_id_tensor->dims), 1);
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
TfLiteTensor* input_value =
micro_context->AllocateTempInputTensor(node, kInputValue);
TFLITE_DCHECK(input_value != nullptr);
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
TF_LITE_ENSURE_OK(context,
resources->Allocate(input_resource_id_tensor->data.i32[0],
context, input_value));
micro_context->DeallocateTempTfLiteTensor(input_value);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input_id =
tflite::micro::GetEvalInput(context, node, kInputVariableId);
TFLITE_DCHECK(input_id != nullptr);
const TfLiteEvalTensor* input_value =
tflite::micro::GetEvalInput(context, node, kInputValue);
TFLITE_DCHECK(input_value != nullptr);
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
if (resources == nullptr) {
MicroPrintf(
"ASSIGN_VARIABLE requires resource variables. Please create "
"ResourceVariables and pass it to the interpreter.");
return kTfLiteError;
}
TF_LITE_ENSURE_OK(context,
resources->Assign(input_id->data.i32[0], input_value));
return kTfLiteOk;
}
} // namespace.
TfLiteRegistration Register_ASSIGN_VARIABLE() {
return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
}
} // namespace tflite

View File

@@ -1,91 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/broadcast_args.h"
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
namespace {
constexpr int kShape1Tensor = 0;
constexpr int kShape2Tensor = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus BroadcastArgsPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* shape1 =
micro_context->AllocateTempInputTensor(node, kShape1Tensor);
TfLiteTensor* shape2 =
micro_context->AllocateTempInputTensor(node, kShape2Tensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context,
shape1->type == kTfLiteInt32 || shape1->type == kTfLiteInt64);
TF_LITE_ENSURE_EQ(context, shape1->type, shape2->type);
TF_LITE_ENSURE_EQ(context, shape1->type, output->type);
// Ensures the shapes are 1D tensor.
TF_LITE_ENSURE_EQ(context, NumDimensions(shape1), 1);
TF_LITE_ENSURE_EQ(context, NumDimensions(shape2), 1);
// Ensure the shape of the output tensor is compatible
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 1);
micro_context->DeallocateTempTfLiteTensor(shape1);
micro_context->DeallocateTempTfLiteTensor(shape2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus BroadcastArgsEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* shape1 =
micro::GetEvalInput(context, node, kShape1Tensor);
const TfLiteEvalTensor* shape2 =
micro::GetEvalInput(context, node, kShape2Tensor);
TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteInt32) {
reference_ops::BroadcastArgs(
micro::GetTensorShape(shape1), micro::GetTensorData<int32_t>(shape1),
micro::GetTensorShape(shape2), micro::GetTensorData<int32_t>(shape2),
micro::GetTensorShape(output), micro::GetTensorData<int32_t>(output));
} else {
reference_ops::BroadcastArgs(
micro::GetTensorShape(shape1), micro::GetTensorData<int64_t>(shape1),
micro::GetTensorShape(shape2), micro::GetTensorData<int64_t>(shape2),
micro::GetTensorShape(output), micro::GetTensorData<int64_t>(output));
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_BROADCAST_ARGS() {
return tflite::micro::RegisterOp(nullptr, BroadcastArgsPrepare,
BroadcastArgsEval);
}
} // namespace tflite

View File

@@ -1,123 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/broadcast_to.h"
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kShapeTensor = 1;
constexpr int kOutputTensor = 0;
// Support a maximum of 5 dimensions in TFLM.
constexpr int kMaxDims = 5;
TfLiteStatus ValidateOutputTensor(TfLiteContext* context, TfLiteTensor* input,
TfLiteTensor* shape, TfLiteTensor* output) {
// Ensures the shape is 1D tensor.
TF_LITE_ENSURE_EQ(context, NumDimensions(shape), 1);
// Ensure output dims is not less than input dims.
int input_num_dims = NumDimensions(input);
int output_num_dims = NumDimensions(output);
int shape_num_dims = SizeOfDimension(shape, 0);
TF_LITE_ENSURE_MSG(context, output_num_dims == shape_num_dims,
"Output must match with the expected shape dimension.");
TF_LITE_ENSURE_MSG(context, input_num_dims <= output_num_dims,
"Output shape must be broadcastable from input shape.");
TF_LITE_ENSURE_MSG(context, output_num_dims <= kMaxDims,
"BroadcastTo only supports 1-5D tensor.");
// Check if output shape is broadcastable from input shape.
auto get_shape_data = [shape](int i) -> int32_t {
if (shape->type == kTfLiteInt32) {
return GetTensorData<int32_t>(shape)[i];
} else {
return GetTensorData<int64_t>(shape)[i];
}
};
int extending_dims = output_num_dims - input_num_dims;
for (int idx = 0; idx < input_num_dims; ++idx) {
TF_LITE_ENSURE_MSG(
context,
(SizeOfDimension(input, idx) == 1 ||
SizeOfDimension(input, idx) == get_shape_data(extending_dims + idx)),
"Output shape must be broadcastable from input shape.");
}
// Validating the shape of the output tensor.
tflite::RuntimeShape output_shape = tflite::GetTensorShape(output);
for (int idx = 0; idx < output_num_dims; ++idx) {
TF_LITE_ENSURE(context, output_shape.Dims(idx) == get_shape_data(idx));
}
return kTfLiteOk;
}
TfLiteStatus BroadcastToPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TfLiteTensor* shape =
micro_context->AllocateTempInputTensor(node, kShapeTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE_MSG(context, (NumDimensions(input) <= kMaxDims),
"BroadcastTo only supports 1-5D tensor.");
TF_LITE_ENSURE(context,
shape->type == kTfLiteInt32 || shape->type == kTfLiteInt64);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
// Does not support String type due to its variable size. This limitation is
// the same as TFLite.
TF_LITE_ENSURE(context, input->type != kTfLiteString);
TF_LITE_ENSURE_STATUS(ValidateOutputTensor(context, input, shape, output));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(shape);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus BroadcastToEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
// BroadcastTo op support upto 5 dims, different from 8 dims in TFLite.
reference_ops::BroadcastTo<kMaxDims>(
micro::GetTensorShape(input), input->data.raw,
micro::GetTensorShape(output), output->data.raw, input->type);
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_BROADCAST_TO() {
return tflite::micro::RegisterOp(nullptr, BroadcastToPrepare,
BroadcastToEval);
}
} // namespace tflite

View File

@@ -1,88 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
struct OpData {
int init_subgraph_index;
bool has_run;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const auto* params =
reinterpret_cast<const TfLiteCallOnceParams*>(node->builtin_data);
op_data->init_subgraph_index = params->init_subgraph_index;
op_data->has_run = false;
TF_LITE_ENSURE(context, NumInputs(node) == 0);
TF_LITE_ENSURE(context, NumOutputs(node) == 0);
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE(context,
op_data->init_subgraph_index < graph_info.NumSubgraphs());
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
// Call once only runs one time then is a no-op for every subsequent call.
if (op_data->has_run) {
return kTfLiteOk;
}
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE_OK(context,
graph_info.InvokeSubgraph(op_data->init_subgraph_index));
op_data->has_run = true;
return kTfLiteOk;
}
} // namespace.
TfLiteRegistration Register_CALL_ONCE() {
return tflite::micro::RegisterOp(Init, Prepare, Eval);
}
} // namespace tflite

View File

@@ -1,114 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
template <typename FromT, typename ToT>
void copyCast(const FromT* in, ToT* out, int num_elements) {
std::transform(in, in + num_elements, out,
[](FromT a) { return static_cast<ToT>(a); });
}
template <typename FromT>
TfLiteStatus copyToTensor(TfLiteContext* context, const FromT* in,
TfLiteEvalTensor* out, int num_elements) {
switch (out->type) {
case kTfLiteInt8:
copyCast(in, out->data.int8, num_elements);
break;
case kTfLiteInt16:
copyCast(in, out->data.i16, num_elements);
break;
case kTfLiteInt32:
copyCast(in, out->data.i32, num_elements);
break;
case kTfLiteFloat32:
copyCast(in, tflite::micro::GetTensorData<float>(out), num_elements);
break;
default:
// Unsupported type.
MicroPrintf("Output type %s (%d) not supported.",
TfLiteTypeGetName(out->type), out->type);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
int num_elements = MatchingFlatSize(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output));
switch (input->type) {
case kTfLiteInt8:
return copyToTensor(context, input->data.int8, output, num_elements);
case kTfLiteInt16:
return copyToTensor(context, tflite::micro::GetTensorData<int16_t>(input),
output, num_elements);
case kTfLiteInt32:
return copyToTensor(context, tflite::micro::GetTensorData<int32_t>(input),
output, num_elements);
case kTfLiteUInt32:
return copyToTensor(context,
tflite::micro::GetTensorData<uint32_t>(input), output,
num_elements);
case kTfLiteFloat32:
return copyToTensor(context, tflite::micro::GetTensorData<float>(input),
output, num_elements);
default:
// Unsupported type.
MicroPrintf("Input type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_CAST() {
return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
}
} // namespace tflite

View File

@@ -1,75 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/ceil.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace ceil {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Ceil(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace ceil
TfLiteRegistration Register_CEIL() {
return tflite::micro::RegisterOp(nullptr, ceil::Prepare, ceil::Eval);
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -1,48 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
// The CircularBuffer op has one input and one output tensor.
extern const int kCircularBufferInputTensor;
extern const int kCircularBufferOutputTensor;
// Indices into the init flexbuffer's vector.
// The parameter's name is in the comment that follows.
// Elements in the vectors are ordered alphabetically by parameter name.
extern const int kCircularBufferCyclesMaxIndex; // 'cycles_max'
// TODO(b/149795762): Add this to TfLiteStatus enum.
extern const TfLiteStatus kTfLiteAbort;
// These fields control the stride period of a strided streaming model. This op
// returns kTfLiteAbort until cycles_until_run-- is zero. At this time,
// cycles_until_run is reset to cycles_max.
struct OpDataCircularBuffer {
int cycles_until_run;
int cycles_max;
};
TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_

View File

@@ -1,97 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/flatbuffer_utils.h"
#include "tensorflow/lite/micro/kernels/circular_buffer.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
// The CircularBuffer op has one input and one output tensor.
const int kCircularBufferInputTensor = 0;
const int kCircularBufferOutputTensor = 0;
// Indices into the init flexbuffer's vector.
// The parameter's name is in the comment that follows.
// Elements in the vectors are ordered alphabetically by parameter name.
const int kCircularBufferCyclesMaxIndex = 0; // 'cycles_max'
// TODO(b/149795762): Add this to TfLiteStatus enum.
const TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kCircularBufferInputTensor);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
node, kCircularBufferOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataCircularBuffer* op_data =
static_cast<OpDataCircularBuffer*>(node->user_data);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]);
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]);
TF_LITE_ENSURE_EQ(context, input->dims->data[2], output->dims->data[2]);
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
// The circular buffer custom operator currently only supports int8.
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
if (op_data->cycles_max <= 0) {
// The last circular buffer layer simply accumulates outputs, and does not
// run periodically.
// TODO(b/150001379): Move this special case logic to the tflite flatbuffer.
static int cb_prepare_count = 0;
cb_prepare_count++;
// These checks specifically work for the only two streaming models
// supported on TFLM. They use the shape of the output tensor along with the
// layer number to determine if the circular buffer period should be 1 or 2.
// These models are outlined int the following documents:
// https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing
// https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing
if (output->dims->data[1] == 5 || output->dims->data[1] == 13 ||
output->dims->data[1] == 25 ||
(cb_prepare_count == 5 && output->dims->data[2] == 2 &&
output->dims->data[3] == 96)) {
op_data->cycles_max = 1;
cb_prepare_count = 0;
} else {
op_data->cycles_max = 2;
}
}
op_data->cycles_until_run = op_data->cycles_max;
node->user_data = op_data;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,22 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
#define TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
extern const int g_gen_data_size_circular_buffer_config;
extern const unsigned char g_gen_data_circular_buffer_config[];
#endif

View File

@@ -1,141 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/conv.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
namespace {
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataConv));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kConvInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
: nullptr;
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto& params =
*(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
TFLITE_DCHECK(node->user_data != nullptr);
const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(
context,
input->type == filter->type ||
(input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
"Hybrid models are not supported on TFLite Micro.");
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
tflite::reference_ops::Conv(
ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output),
tflite::micro::GetTensorShape(nullptr), nullptr);
break;
}
case kTfLiteInt16: {
switch (bias->type) {
case kTfLiteInt32: {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data),
data.per_channel_output_multiplier, data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<std::int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
break;
}
case kTfLiteInt64: {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data),
data.per_channel_output_multiplier, data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<std::int64_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
break;
}
default:
MicroPrintf("Bias type %s (%d) not supported.",
TfLiteTypeGetName(bias->type), bias->type);
return kTfLiteError;
}
break;
}
case kTfLiteInt8: {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetOptionalTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
}
default:
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
input->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_CONV_2D() {
return tflite::micro::RegisterOp(Init, ConvPrepare, Eval);
}
} // namespace tflite

View File

@@ -1,112 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
struct OpDataConv {
TfLitePaddingValues padding;
// Cached tensor zero point values for quantized operations.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// Per channel output multiplier and shift.
int32_t* per_channel_output_multiplier;
int32_t* per_channel_output_shift;
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
};
extern const int kConvInputTensor;
extern const int kConvWeightsTensor;
extern const int kConvBiasTensor;
extern const int kConvOutputTensor;
extern const int kConvQuantizedDimension;
// Returns a ConvParams struct with all the parameters needed for a
// float computation.
ConvParams ConvParamsFloat(const TfLiteConvParams& params,
const OpDataConv& data);
// Returns a ConvParams struct with all the parameters needed for a
// quantized computation.
ConvParams ConvParamsQuantized(const TfLiteConvParams& params,
const OpDataConv& data);
TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
const TfLiteConvParams& params, int width,
int height, int filter_width,
int filter_height, int out_width,
int out_height, const TfLiteType data_type,
OpDataConv* data);
TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);
// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_CONV_2D();
#if defined(XTENSA)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 activations and int8 weights and always calls the reference
// implementation.
TfLiteRegistration Register_CONV_2D_INT8REF();
#else
inline TfLiteRegistration Register_CONV_2D_INT8REF() {
return Register_CONV_2D();
}
#endif
#if defined(CMSIS_NN)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 activations and int8 weights and uses the latency optimized
// implementations.
TfLiteRegistration Register_CONV_2D_INT8();
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int16 activations and int8 weights and uses the latency optimized
// implementations.
TfLiteRegistration Register_CONV_2D_INT16();
#else
inline TfLiteRegistration Register_CONV_2D_INT8() { return Register_CONV_2D(); }
inline TfLiteRegistration Register_CONV_2D_INT16() {
return Register_CONV_2D();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_

View File

@@ -1,197 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/conv.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/conv.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
const int kConvInputTensor = 0;
const int kConvWeightsTensor = 1;
const int kConvBiasTensor = 2;
const int kConvOutputTensor = 0;
// Conv is quantized along dimension 0:
// https://www.tensorflow.org/lite/performance/quantization_spec
const int kConvQuantizedDimension = 0;
// Returns a ConvParams struct with all the parameters needed for a
// float computation.
ConvParams ConvParamsFloat(const TfLiteConvParams& params,
const OpDataConv& data) {
ConvParams op_params;
CalculateActivationRange(params.activation, &op_params.float_activation_min,
&op_params.float_activation_max);
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params.stride_width;
op_params.stride_height = params.stride_height;
op_params.dilation_width_factor = params.dilation_width_factor;
op_params.dilation_height_factor = params.dilation_height_factor;
return op_params;
}
// Returns a ConvParams struct with all the parameters needed for a
// quantized computation.
ConvParams ConvParamsQuantized(const TfLiteConvParams& params,
const OpDataConv& data) {
ConvParams op_params;
op_params.input_offset = -data.input_zero_point;
op_params.weights_offset = -data.filter_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.output_multiplier = data.output_multiplier;
op_params.output_shift = -data.output_shift;
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
op_params.padding_values.height = data.padding.height;
op_params.padding_values.width = data.padding.width;
op_params.stride_height = params.stride_height;
op_params.stride_width = params.stride_width;
op_params.dilation_height_factor = params.dilation_height_factor;
op_params.dilation_width_factor = params.dilation_width_factor;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
return op_params;
}
TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
const TfLiteConvParams& params, int width,
int height, int filter_width,
int filter_height, int out_width,
int out_height, const TfLiteType data_type,
OpDataConv* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Matching GetWindowedOutputSize in TensorFlow.
auto padding = params.padding;
data->padding = ComputePaddingHeightWidth(
params.stride_height, params.stride_width, params.dilation_height_factor,
params.dilation_width_factor, height, width, filter_height, filter_width,
padding, &out_height, &out_width);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
int output_channels = filter->dims->data[kConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params.activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier, data->per_channel_output_shift,
output_channels));
}
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(bias);
return kTfLiteOk;
}
TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
const auto& params =
*(static_cast<const TfLiteConvParams*>(node->builtin_data));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const int input_width = input->dims->data[2];
const int input_height = input->dims->data[1];
const int filter_width = filter->dims->data[2];
const int filter_height = filter->dims->data[1];
const int output_width = output->dims->data[2];
const int output_height = output->dims->data[1];
// Dynamically allocate per-channel quantization parameters.
const int num_channels = filter->dims->data[kConvQuantizedDimension];
data->per_channel_output_multiplier =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->per_channel_output_shift =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
TFLITE_DCHECK(affine_quantization != nullptr);
TFLITE_DCHECK(affine_quantization->scale != nullptr);
TFLITE_DCHECK(affine_quantization->zero_point != nullptr);
TF_LITE_ENSURE(context,
affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kConvQuantizedDimension]);
}
TF_LITE_ENSURE_STATUS(CalculateOpDataConv(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,113 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
#include "tensorflow/lite/micro/test_helpers.h"
#include "tensorflow/lite/micro/testing/micro_test.h"
namespace tflite {
namespace testing {
TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
int output_length, TfLiteConvParams* conv_params,
TfLiteRegistration registration, float* output_data);
TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
int output_length, TfLiteConvParams* conv_params,
TfLiteRegistration registration, int8_t* output_data);
TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
int output_length, TfLiteConvParams* conv_params,
TfLiteRegistration registration, uint8_t* output_data);
TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
const float* expected_output_data,
int output_length,
TfLiteConvParams* conv_params,
TfLiteRegistration registration,
float* output_data, float tolerance = 1e-5);
TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
const int8_t* expected_output_data,
int output_length,
TfLiteConvParams* conv_params,
TfLiteRegistration registration,
int8_t* output_data, float tolerance = 1e-5);
TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
const uint8_t* expected_output_data,
int output_length,
TfLiteConvParams* conv_params,
TfLiteRegistration registration,
uint8_t* output_data, float tolerance = 1e-5);
TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data,
int* filter_dims_data, const float* filter_data,
int* bias_dims_data, const float* bias_data,
int* output_dims_data,
const float* expected_output_data,
TfLiteConvParams* conv_params,
TfLiteRegistration registration, float* output_data);
TfLiteStatus TestConvQuantizedPerLayer(
int* input_dims_data, const float* input_data, uint8_t* input_quantized,
float input_scale, int* filter_dims_data, const float* filter_data,
uint8_t* filter_quantized, float filter_scale, int* bias_dims_data,
const float* bias_data, int32_t* bias_quantized, int* output_dims_data,
const float* expected_output_data, uint8_t* expected_output_quantized,
float output_scale, TfLiteConvParams* conv_params,
TfLiteRegistration registration, uint8_t* output_data);
TfLiteStatus TestConvQuantizedPerChannel(
int* input_dims_data, const float* input_data, int8_t* input_quantized,
float input_scale, int input_zero_point, int* filter_dims_data,
const float* filter_data, int8_t* filter_data_quantized,
int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
float* bias_scales, int* bias_zero_points, int* output_dims_data,
const float* expected_output_data, int8_t* expected_output_data_quantized,
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
TfLiteRegistration registration, int8_t* output_data);
TfLiteStatus TestConvQuantizedPerChannel(
int* input_dims_data, const float* input_data, int16_t* input_quantized,
float input_scale, int input_zero_point, int* filter_dims_data,
const float* filter_data, int8_t* filter_data_quantized,
int* bias_dims_data, const float* bias_data,
std::int64_t* bias_data_quantized, float* bias_scales,
int* bias_zero_points, int* output_dims_data,
const float* expected_output_data, int16_t* expected_output_data_quantized,
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
TfLiteRegistration registration, int16_t* output_data);
TfLiteStatus TestConvQuantizedPerChannel(
int* input_dims_data, const float* input_data, int16_t* input_quantized,
float input_scale, int input_zero_point, int* filter_dims_data,
const float* filter_data, int8_t* filter_data_quantized,
int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
float* bias_scales, int* bias_zero_points, int* output_dims_data,
const float* expected_output_data, int16_t* expected_output_data_quantized,
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
TfLiteRegistration registration, int16_t* output_data);
} // namespace testing
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_

View File

@@ -1,80 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/micro/kernels/conv.h"
namespace tflite {
extern const int kDepthwiseConvInputTensor;
extern const int kDepthwiseConvWeightsTensor;
extern const int kDepthwiseConvBiasTensor;
extern const int kDepthwiseConvOutputTensor;
extern const int kDepthwiseConvQuantizedDimension;
// Returns a DepthwiseParams struct with all the parameters needed for a
// float computation.
DepthwiseParams DepthwiseConvParamsFloat(
const TfLiteDepthwiseConvParams& params, const OpDataConv& data);
// Returns a DepthwiseParams struct with all the parameters needed for a
// quantized computation.
DepthwiseParams DepthwiseConvParamsQuantized(
const TfLiteDepthwiseConvParams& params, const OpDataConv& data);
TfLiteStatus CalculateOpDataDepthwiseConv(
TfLiteContext* context, TfLiteNode* node,
const TfLiteDepthwiseConvParams& params, int width, int height,
int filter_width, int filter_height, int out_width, int out_height,
const TfLiteType data_type, OpDataConv* data);
TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node);
// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
#if defined(CMSIS_NN)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 activations and int8 weights and uses the latency optimized
// implementations.
TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8();
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int16 activations and int8 weights and uses the latency optimized
// implementations.
TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16();
#else
inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8() {
return Register_DEPTHWISE_CONV_2D();
}
inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16() {
return Register_DEPTHWISE_CONV_2D();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_

View File

@@ -1,202 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/depthwise_conv.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
const int kDepthwiseConvInputTensor = 0;
const int kDepthwiseConvWeightsTensor = 1;
const int kDepthwiseConvBiasTensor = 2;
const int kDepthwiseConvOutputTensor = 0;
// DepthwiseConv is quantized along dimension 3:
// https://www.tensorflow.org/lite/performance/quantization_spec
const int kDepthwiseConvQuantizedDimension = 3;
// Returns a DepthwiseParams struct with all the parameters needed for a
// float computation.
DepthwiseParams DepthwiseConvParamsFloat(
const TfLiteDepthwiseConvParams& params, const OpDataConv& data) {
DepthwiseParams op_params;
CalculateActivationRange(params.activation, &op_params.float_activation_min,
&op_params.float_activation_max);
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
op_params.padding_values.width = data.padding.width;
op_params.padding_values.height = data.padding.height;
op_params.stride_width = params.stride_width;
op_params.stride_height = params.stride_height;
op_params.dilation_width_factor = params.dilation_width_factor;
op_params.dilation_height_factor = params.dilation_height_factor;
op_params.depth_multiplier = params.depth_multiplier;
return op_params;
}
// Returns a DepthwiseParams struct with all the parameters needed for a
// quantized computation.
DepthwiseParams DepthwiseConvParamsQuantized(
const TfLiteDepthwiseConvParams& params, const OpDataConv& data) {
DepthwiseParams op_params;
op_params.input_offset = -data.input_zero_point;
op_params.weights_offset = -data.filter_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.output_multiplier = data.output_multiplier;
op_params.output_shift = -data.output_shift;
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
op_params.padding_values.height = data.padding.height;
op_params.padding_values.width = data.padding.width;
op_params.stride_height = params.stride_height;
op_params.stride_width = params.stride_width;
op_params.dilation_height_factor = params.dilation_height_factor;
op_params.dilation_width_factor = params.dilation_width_factor;
op_params.depth_multiplier = params.depth_multiplier;
op_params.quantized_activation_min = data.output_activation_min;
op_params.quantized_activation_max = data.output_activation_max;
return op_params;
}
TfLiteStatus CalculateOpDataDepthwiseConv(
TfLiteContext* context, TfLiteNode* node,
const TfLiteDepthwiseConvParams& params, int width, int height,
int filter_width, int filter_height, int out_width, int out_height,
const TfLiteType data_type, OpDataConv* data) {
bool has_bias = node->inputs->size == 3;
// Check number of inputs/outputs
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
// Matching GetWindowedOutputSize in TensorFlow.
auto padding = params.padding;
data->padding = ComputePaddingHeightWidth(
params.stride_height, params.stride_width, params.dilation_height_factor,
params.dilation_width_factor, height, width, filter_height, filter_width,
padding, &out_height, &out_width);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Note that quantized inference requires that all tensors have their
// parameters set. This is usually done during quantized training.
if (data_type != kTfLiteFloat32) {
int output_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
context, input, filter, bias, output, params.activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier, data->per_channel_output_shift,
output_channels));
}
data->input_zero_point = input->params.zero_point;
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
micro_context->DeallocateTempTfLiteTensor(bias);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
const auto& params =
*(static_cast<const TfLiteDepthwiseConvParams*>(node->builtin_data));
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kDepthwiseConvOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kDepthwiseConvInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter =
micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
const int input_width = input->dims->data[2];
const int input_height = input->dims->data[1];
const int filter_width = filter->dims->data[2];
const int filter_height = filter->dims->data[1];
const int output_width = output->dims->data[2];
const int output_height = output->dims->data[1];
// Dynamically allocate per-channel quantization parameters.
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
data->per_channel_output_multiplier =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
data->per_channel_output_shift =
static_cast<int32_t*>(context->AllocatePersistentBuffer(
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
TFLITE_DCHECK(affine_quantization != nullptr);
TFLITE_DCHECK(affine_quantization->scale != nullptr);
TFLITE_DCHECK(affine_quantization->zero_point != nullptr);
TF_LITE_ENSURE(
context, affine_quantization->scale->size == 1 ||
affine_quantization->scale->size ==
filter->dims->data[kDepthwiseConvQuantizedDimension]);
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
affine_quantization->zero_point->size);
}
TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv(
context, node, params, input_width, input_height, filter_width,
filter_height, output_width, output_height, input->type, data));
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,88 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/dequantize.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
void* DequantizeInit(TfLiteContext* context, const char* buffer,
size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(DequantizeOpData));
}
TfLiteStatus DequantizeEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
DequantizeOpData* data = static_cast<DequantizeOpData*>(node->user_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (output->type == kTfLiteFloat32) {
switch (input->type) {
case kTfLiteInt8:
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt16:
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteUInt8:
reference_ops::Dequantize(data->quantization_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
default:
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteRegistration Register_DEQUANTIZE() {
return tflite::micro::RegisterOp(DequantizeInit, DequantizePrepare,
DequantizeEval);
}
} // namespace tflite

View File

@@ -1,38 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
struct DequantizeOpData {
tflite::DequantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
int32_t output_zero_point;
};
TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_

View File

@@ -1,67 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/dequantize.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
DequantizeOpData* data = static_cast<DequantizeOpData*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context, input->type == kTfLiteInt8 ||
input->type == kTfLiteInt16 ||
input->type == kTfLiteUInt8);
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32);
if (output->type == kTfLiteInt32) {
const double effective_output_scale =
static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
&data->output_shift);
}
data->quantization_params.zero_point = input->params.zero_point;
data->quantization_params.scale = static_cast<double>(input->params.scale);
data->output_zero_point = output->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,807 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <algorithm>
#include <numeric>
#include <tuple>
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
/**
* This version of detection_postprocess is specific to TFLite Micro. It
* contains the following differences between the TFLite version:
*
* 1.) Temporaries (temporary tensors) - Micro use instead scratch buffer API.
* 2.) Output dimensions - the TFLite version does not support undefined out
* dimensions. So model must have static out dimensions.
*/
// Input tensors
constexpr int kInputTensorBoxEncodings = 0;
constexpr int kInputTensorClassPredictions = 1;
constexpr int kInputTensorAnchors = 2;
// Output tensors
constexpr int kOutputTensorDetectionBoxes = 0;
constexpr int kOutputTensorDetectionClasses = 1;
constexpr int kOutputTensorDetectionScores = 2;
constexpr int kOutputTensorNumDetections = 3;
constexpr int kNumCoordBox = 4;
constexpr int kBatchSize = 1;
constexpr int kNumDetectionsPerClass = 100;
// Object Detection model produces axis-aligned boxes in two formats:
// BoxCorner represents the lower left corner (xmin, ymin) and
// the upper right corner (xmax, ymax).
// CenterSize represents the center (xcenter, ycenter), height and width.
// BoxCornerEncoding and CenterSizeEncoding are related as follows:
// ycenter = y / y_scale * anchor.h + anchor.y;
// xcenter = x / x_scale * anchor.w + anchor.x;
// half_h = 0.5*exp(h/ h_scale)) * anchor.h;
// half_w = 0.5*exp(w / w_scale)) * anchor.w;
// ymin = ycenter - half_h
// ymax = ycenter + half_h
// xmin = xcenter - half_w
// xmax = xcenter + half_w
struct BoxCornerEncoding {
float ymin;
float xmin;
float ymax;
float xmax;
};
struct CenterSizeEncoding {
float y;
float x;
float h;
float w;
};
// We make sure that the memory allocations are contiguous with static_assert.
static_assert(sizeof(BoxCornerEncoding) == sizeof(float) * kNumCoordBox,
"Size of BoxCornerEncoding is 4 float values");
static_assert(sizeof(CenterSizeEncoding) == sizeof(float) * kNumCoordBox,
"Size of CenterSizeEncoding is 4 float values");
struct OpData {
int max_detections;
int max_classes_per_detection; // Fast Non-Max-Suppression
int detections_per_class; // Regular Non-Max-Suppression
float non_max_suppression_score_threshold;
float intersection_over_union_threshold;
int num_classes;
bool use_regular_non_max_suppression;
CenterSizeEncoding scale_values;
// Scratch buffers indexes
int active_candidate_idx;
int decoded_boxes_idx;
int scores_idx;
int score_buffer_idx;
int keep_scores_idx;
int scores_after_regular_non_max_suppression_idx;
int sorted_values_idx;
int keep_indices_idx;
int sorted_indices_idx;
int buffer_idx;
int selected_idx;
// Cached tensor scale and zero point values for quantized operations
TfLiteQuantizationParams input_box_encodings;
TfLiteQuantizationParams input_class_predictions;
TfLiteQuantizationParams input_anchors;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
OpData* op_data = nullptr;
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
op_data = reinterpret_cast<OpData*>(
context->AllocatePersistentBuffer(context, sizeof(OpData)));
op_data->max_detections = m["max_detections"].AsInt32();
op_data->max_classes_per_detection = m["max_classes_per_detection"].AsInt32();
if (m["detections_per_class"].IsNull())
op_data->detections_per_class = kNumDetectionsPerClass;
else
op_data->detections_per_class = m["detections_per_class"].AsInt32();
if (m["use_regular_nms"].IsNull())
op_data->use_regular_non_max_suppression = false;
else
op_data->use_regular_non_max_suppression = m["use_regular_nms"].AsBool();
op_data->non_max_suppression_score_threshold =
m["nms_score_threshold"].AsFloat();
op_data->intersection_over_union_threshold = m["nms_iou_threshold"].AsFloat();
op_data->num_classes = m["num_classes"].AsInt32();
op_data->scale_values.y = m["y_scale"].AsFloat();
op_data->scale_values.x = m["x_scale"].AsFloat();
op_data->scale_values.h = m["h_scale"].AsFloat();
op_data->scale_values.w = m["w_scale"].AsFloat();
return op_data;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
auto* op_data = static_cast<OpData*>(node->user_data);
MicroContext* micro_context = GetMicroContext(context);
// Inputs: box_encodings, scores, anchors
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
TfLiteTensor* input_box_encodings =
micro_context->AllocateTempInputTensor(node, kInputTensorBoxEncodings);
TfLiteTensor* input_class_predictions =
micro_context->AllocateTempInputTensor(node,
kInputTensorClassPredictions);
TfLiteTensor* input_anchors =
micro_context->AllocateTempInputTensor(node, kInputTensorAnchors);
TF_LITE_ENSURE_EQ(context, NumDimensions(input_box_encodings), 3);
TF_LITE_ENSURE_EQ(context, NumDimensions(input_class_predictions), 3);
TF_LITE_ENSURE_EQ(context, NumDimensions(input_anchors), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4);
const int num_boxes = input_box_encodings->dims->data[1];
const int num_classes = op_data->num_classes;
op_data->input_box_encodings.scale = input_box_encodings->params.scale;
op_data->input_box_encodings.zero_point =
input_box_encodings->params.zero_point;
op_data->input_class_predictions.scale =
input_class_predictions->params.scale;
op_data->input_class_predictions.zero_point =
input_class_predictions->params.zero_point;
op_data->input_anchors.scale = input_anchors->params.scale;
op_data->input_anchors.zero_point = input_anchors->params.zero_point;
// Scratch tensors
context->RequestScratchBufferInArena(context, num_boxes,
&op_data->active_candidate_idx);
context->RequestScratchBufferInArena(context,
num_boxes * kNumCoordBox * sizeof(float),
&op_data->decoded_boxes_idx);
context->RequestScratchBufferInArena(
context,
input_class_predictions->dims->data[1] *
input_class_predictions->dims->data[2] * sizeof(float),
&op_data->scores_idx);
// Additional buffers
context->RequestScratchBufferInArena(context, num_boxes * sizeof(float),
&op_data->score_buffer_idx);
context->RequestScratchBufferInArena(context, num_boxes * sizeof(float),
&op_data->keep_scores_idx);
context->RequestScratchBufferInArena(
context, op_data->max_detections * num_boxes * sizeof(float),
&op_data->scores_after_regular_non_max_suppression_idx);
context->RequestScratchBufferInArena(
context, op_data->max_detections * num_boxes * sizeof(float),
&op_data->sorted_values_idx);
context->RequestScratchBufferInArena(context, num_boxes * sizeof(int),
&op_data->keep_indices_idx);
context->RequestScratchBufferInArena(
context, op_data->max_detections * num_boxes * sizeof(int),
&op_data->sorted_indices_idx);
int buffer_size = std::max(num_classes, op_data->max_detections);
context->RequestScratchBufferInArena(
context, buffer_size * num_boxes * sizeof(int), &op_data->buffer_idx);
buffer_size = std::min(num_boxes, op_data->max_detections);
context->RequestScratchBufferInArena(
context, buffer_size * num_boxes * sizeof(int), &op_data->selected_idx);
// Outputs: detection_boxes, detection_scores, detection_classes,
// num_detections
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4);
micro_context->DeallocateTempTfLiteTensor(input_box_encodings);
micro_context->DeallocateTempTfLiteTensor(input_class_predictions);
micro_context->DeallocateTempTfLiteTensor(input_anchors);
return kTfLiteOk;
}
class Dequantizer {
public:
Dequantizer(int zero_point, float scale)
: zero_point_(zero_point), scale_(scale) {}
float operator()(uint8_t x) {
return (static_cast<float>(x) - zero_point_) * scale_;
}
private:
int zero_point_;
float scale_;
};
template <class T>
T ReInterpretTensor(const TfLiteEvalTensor* tensor) {
const float* tensor_base = tflite::micro::GetTensorData<float>(tensor);
return reinterpret_cast<T>(tensor_base);
}
template <class T>
T ReInterpretTensor(TfLiteEvalTensor* tensor) {
float* tensor_base = tflite::micro::GetTensorData<float>(tensor);
return reinterpret_cast<T>(tensor_base);
}
TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node,
OpData* op_data) {
// Parse input tensor boxencodings
const TfLiteEvalTensor* input_box_encodings =
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
TF_LITE_ENSURE_EQ(context, input_box_encodings->dims->data[0], kBatchSize);
const int num_boxes = input_box_encodings->dims->data[1];
TF_LITE_ENSURE(context, input_box_encodings->dims->data[2] >= kNumCoordBox);
const TfLiteEvalTensor* input_anchors =
tflite::micro::GetEvalInput(context, node, kInputTensorAnchors);
// Decode the boxes to get (ymin, xmin, ymax, xmax) based on the anchors
CenterSizeEncoding box_centersize;
CenterSizeEncoding scale_values = op_data->scale_values;
CenterSizeEncoding anchor;
for (int idx = 0; idx < num_boxes; ++idx) {
switch (input_box_encodings->type) {
// Float
case kTfLiteFloat32: {
// Please see DequantizeBoxEncodings function for the support detail.
const int box_encoding_idx = idx * input_box_encodings->dims->data[2];
const float* boxes = &(tflite::micro::GetTensorData<float>(
input_box_encodings)[box_encoding_idx]);
box_centersize = *reinterpret_cast<const CenterSizeEncoding*>(boxes);
anchor =
ReInterpretTensor<const CenterSizeEncoding*>(input_anchors)[idx];
break;
}
default:
// Unsupported type.
return kTfLiteError;
}
float ycenter = static_cast<float>(static_cast<double>(box_centersize.y) /
static_cast<double>(scale_values.y) *
static_cast<double>(anchor.h) +
static_cast<double>(anchor.y));
float xcenter = static_cast<float>(static_cast<double>(box_centersize.x) /
static_cast<double>(scale_values.x) *
static_cast<double>(anchor.w) +
static_cast<double>(anchor.x));
float half_h =
static_cast<float>(0.5 *
(std::exp(static_cast<double>(box_centersize.h) /
static_cast<double>(scale_values.h))) *
static_cast<double>(anchor.h));
float half_w =
static_cast<float>(0.5 *
(std::exp(static_cast<double>(box_centersize.w) /
static_cast<double>(scale_values.w))) *
static_cast<double>(anchor.w));
float* decoded_boxes = reinterpret_cast<float*>(
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
auto& box = reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[idx];
box.ymin = ycenter - half_h;
box.xmin = xcenter - half_w;
box.ymax = ycenter + half_h;
box.xmax = xcenter + half_w;
}
return kTfLiteOk;
}
void DecreasingPartialArgSort(const float* values, int num_values,
int num_to_sort, int* indices) {
std::iota(indices, indices + num_values, 0);
std::partial_sort(indices, indices + num_to_sort, indices + num_values,
[&values](const int i, const int j) {
return std::tie(values[i], j) > std::tie(values[j], i);
});
}
template <typename Compare>
void InsertionSort(int* start, int* end, Compare compare) {
for (int* i = start; i != end; ++i) {
std::rotate(std::upper_bound(start, i, *i, compare), i, i + 1);
}
}
template <typename Compare>
void TopDownMerge(int* values, int* scratch, const int half_num_values,
int num_values, Compare compare) {
int left = 0;
int right = half_num_values;
for (int i = 0; i < num_values; i++) {
if (left >= half_num_values ||
(right < num_values && compare(values[right], values[left]))) {
scratch[i] = values[right++];
} else {
scratch[i] = values[left++];
}
}
memcpy(values, scratch, num_values * sizeof(int));
}
template <typename Compare>
void MergeSort(int* values, int* scratch, const int num_values,
Compare compare) {
constexpr int threshold = 20;
if (num_values < threshold) {
InsertionSort(values, values + num_values, compare);
return;
}
const int half_num_values = num_values / 2;
MergeSort(values, scratch, half_num_values, compare);
MergeSort(values + half_num_values, scratch, num_values - half_num_values,
compare);
TopDownMerge(values, scratch, half_num_values, num_values, compare);
}
void DecreasingArgSort(const float* values, int num_values, int* indices,
int* scratch) {
std::iota(indices, indices + num_values, 0);
MergeSort(indices, scratch, num_values, [&values](const int i, const int j) {
return values[i] > values[j];
});
}
int SelectDetectionsAboveScoreThreshold(const float* values, int size,
const float threshold,
float* keep_values, int* keep_indices) {
int counter = 0;
for (int i = 0; i < size; i++) {
if (values[i] >= threshold) {
keep_values[counter] = values[i];
keep_indices[counter] = i;
counter++;
}
}
return counter;
}
bool ValidateBoxes(const float* decoded_boxes, const int num_boxes) {
for (int i = 0; i < num_boxes; ++i) {
// ymax>=ymin, xmax>=xmin
auto& box = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[i];
if (box.ymin >= box.ymax || box.xmin >= box.xmax) {
return false;
}
}
return true;
}
float ComputeIntersectionOverUnion(const float* decoded_boxes, const int i,
const int j) {
auto& box_i = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[i];
auto& box_j = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[j];
const float area_i = (box_i.ymax - box_i.ymin) * (box_i.xmax - box_i.xmin);
const float area_j = (box_j.ymax - box_j.ymin) * (box_j.xmax - box_j.xmin);
if (area_i <= 0 || area_j <= 0) return 0.0;
const float intersection_ymin = std::max<float>(box_i.ymin, box_j.ymin);
const float intersection_xmin = std::max<float>(box_i.xmin, box_j.xmin);
const float intersection_ymax = std::min<float>(box_i.ymax, box_j.ymax);
const float intersection_xmax = std::min<float>(box_i.xmax, box_j.xmax);
const float intersection_area =
std::max<float>(intersection_ymax - intersection_ymin, 0.0) *
std::max<float>(intersection_xmax - intersection_xmin, 0.0);
return intersection_area / (area_i + area_j - intersection_area);
}
// NonMaxSuppressionSingleClass() prunes out the box locations with high overlap
// before selecting the highest scoring boxes (max_detections in number)
// It assumes all boxes are good in beginning and sorts based on the scores.
// If lower-scoring box has too much overlap with a higher-scoring box,
// we get rid of the lower-scoring box.
// Complexity is O(N^2) pairwise comparison between boxes
TfLiteStatus NonMaxSuppressionSingleClassHelper(
TfLiteContext* context, TfLiteNode* node, OpData* op_data,
const float* scores, int* selected, int* selected_size,
int max_detections) {
const TfLiteEvalTensor* input_box_encodings =
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
const int num_boxes = input_box_encodings->dims->data[1];
const float non_max_suppression_score_threshold =
op_data->non_max_suppression_score_threshold;
const float intersection_over_union_threshold =
op_data->intersection_over_union_threshold;
// Maximum detections should be positive.
TF_LITE_ENSURE(context, (max_detections >= 0));
// intersection_over_union_threshold should be positive
// and should be less than 1.
TF_LITE_ENSURE(context, (intersection_over_union_threshold > 0.0f) &&
(intersection_over_union_threshold <= 1.0f));
// Validate boxes
float* decoded_boxes = reinterpret_cast<float*>(
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
TF_LITE_ENSURE(context, ValidateBoxes(decoded_boxes, num_boxes));
// threshold scores
int* keep_indices = reinterpret_cast<int*>(
context->GetScratchBuffer(context, op_data->keep_indices_idx));
float* keep_scores = reinterpret_cast<float*>(
context->GetScratchBuffer(context, op_data->keep_scores_idx));
int num_scores_kept = SelectDetectionsAboveScoreThreshold(
scores, num_boxes, non_max_suppression_score_threshold, keep_scores,
keep_indices);
int* sorted_indices = reinterpret_cast<int*>(
context->GetScratchBuffer(context, op_data->sorted_indices_idx));
// Reusing keep_indices for scratch buffer and write back its values
// after the sorting is done.
DecreasingArgSort(keep_scores, num_scores_kept, sorted_indices, keep_indices);
int counter = 0;
for (int i = 0; i < num_boxes; i++) {
if (scores[i] >= non_max_suppression_score_threshold) {
keep_indices[counter] = i;
counter++;
}
}
const int num_boxes_kept = num_scores_kept;
const int output_size = std::min(num_boxes_kept, max_detections);
*selected_size = 0;
int num_active_candidate = num_boxes_kept;
uint8_t* active_box_candidate = reinterpret_cast<uint8_t*>(
context->GetScratchBuffer(context, op_data->active_candidate_idx));
for (int row = 0; row < num_boxes_kept; row++) {
active_box_candidate[row] = 1;
}
for (int i = 0; i < num_boxes_kept; ++i) {
if (num_active_candidate == 0 || *selected_size >= output_size) break;
if (active_box_candidate[i] == 1) {
selected[(*selected_size)++] = keep_indices[sorted_indices[i]];
active_box_candidate[i] = 0;
num_active_candidate--;
} else {
continue;
}
for (int j = i + 1; j < num_boxes_kept; ++j) {
if (active_box_candidate[j] == 1) {
float intersection_over_union = ComputeIntersectionOverUnion(
decoded_boxes, keep_indices[sorted_indices[i]],
keep_indices[sorted_indices[j]]);
if (intersection_over_union > intersection_over_union_threshold) {
active_box_candidate[j] = 0;
num_active_candidate--;
}
}
}
}
return kTfLiteOk;
}
// This function implements a regular version of Non Maximal Suppression (NMS)
// for multiple classes where
// 1) we do NMS separately for each class across all anchors and
// 2) keep only the highest anchor scores across all classes
// 3) The worst runtime of the regular NMS is O(K*N^2)
// where N is the number of anchors and K the number of
// classes.
TfLiteStatus NonMaxSuppressionMultiClassRegularHelper(TfLiteContext* context,
TfLiteNode* node,
OpData* op_data,
const float* scores) {
const TfLiteEvalTensor* input_box_encodings =
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
const TfLiteEvalTensor* input_class_predictions =
tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
TfLiteEvalTensor* detection_boxes =
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes);
TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput(
context, node, kOutputTensorDetectionClasses);
TfLiteEvalTensor* detection_scores =
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores);
TfLiteEvalTensor* num_detections =
tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections);
const int num_boxes = input_box_encodings->dims->data[1];
const int num_classes = op_data->num_classes;
const int num_detections_per_class = op_data->detections_per_class;
const int max_detections = op_data->max_detections;
const int num_classes_with_background =
input_class_predictions->dims->data[2];
// The row index offset is 1 if background class is included and 0 otherwise.
int label_offset = num_classes_with_background - num_classes;
TF_LITE_ENSURE(context, num_detections_per_class > 0);
// For each class, perform non-max suppression.
float* class_scores = reinterpret_cast<float*>(
context->GetScratchBuffer(context, op_data->score_buffer_idx));
int* box_indices_after_regular_non_max_suppression = reinterpret_cast<int*>(
context->GetScratchBuffer(context, op_data->buffer_idx));
float* scores_after_regular_non_max_suppression =
reinterpret_cast<float*>(context->GetScratchBuffer(
context, op_data->scores_after_regular_non_max_suppression_idx));
int size_of_sorted_indices = 0;
int* sorted_indices = reinterpret_cast<int*>(
context->GetScratchBuffer(context, op_data->sorted_indices_idx));
float* sorted_values = reinterpret_cast<float*>(
context->GetScratchBuffer(context, op_data->sorted_values_idx));
for (int col = 0; col < num_classes; col++) {
for (int row = 0; row < num_boxes; row++) {
// Get scores of boxes corresponding to all anchors for single class
class_scores[row] =
*(scores + row * num_classes_with_background + col + label_offset);
}
// Perform non-maximal suppression on single class
int selected_size = 0;
int* selected = reinterpret_cast<int*>(
context->GetScratchBuffer(context, op_data->selected_idx));
TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper(
context, node, op_data, class_scores, selected, &selected_size,
num_detections_per_class));
// Add selected indices from non-max suppression of boxes in this class
int output_index = size_of_sorted_indices;
for (int i = 0; i < selected_size; i++) {
int selected_index = selected[i];
box_indices_after_regular_non_max_suppression[output_index] =
(selected_index * num_classes_with_background + col + label_offset);
scores_after_regular_non_max_suppression[output_index] =
class_scores[selected_index];
output_index++;
}
// Sort the max scores among the selected indices
// Get the indices for top scores
int num_indices_to_sort = std::min(output_index, max_detections);
DecreasingPartialArgSort(scores_after_regular_non_max_suppression,
output_index, num_indices_to_sort, sorted_indices);
// Copy values to temporary vectors
for (int row = 0; row < num_indices_to_sort; row++) {
int temp = sorted_indices[row];
sorted_indices[row] = box_indices_after_regular_non_max_suppression[temp];
sorted_values[row] = scores_after_regular_non_max_suppression[temp];
}
// Copy scores and indices from temporary vectors
for (int row = 0; row < num_indices_to_sort; row++) {
box_indices_after_regular_non_max_suppression[row] = sorted_indices[row];
scores_after_regular_non_max_suppression[row] = sorted_values[row];
}
size_of_sorted_indices = num_indices_to_sort;
}
// Allocate output tensors
for (int output_box_index = 0; output_box_index < max_detections;
output_box_index++) {
if (output_box_index < size_of_sorted_indices) {
const int anchor_index = floor(
box_indices_after_regular_non_max_suppression[output_box_index] /
num_classes_with_background);
const int class_index =
box_indices_after_regular_non_max_suppression[output_box_index] -
anchor_index * num_classes_with_background - label_offset;
const float selected_score =
scores_after_regular_non_max_suppression[output_box_index];
// detection_boxes
float* decoded_boxes = reinterpret_cast<float*>(
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
ReInterpretTensor<BoxCornerEncoding*>(detection_boxes)[output_box_index] =
reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[anchor_index];
// detection_classes
tflite::micro::GetTensorData<float>(detection_classes)[output_box_index] =
class_index;
// detection_scores
tflite::micro::GetTensorData<float>(detection_scores)[output_box_index] =
selected_score;
} else {
ReInterpretTensor<BoxCornerEncoding*>(
detection_boxes)[output_box_index] = {0.0f, 0.0f, 0.0f, 0.0f};
// detection_classes
tflite::micro::GetTensorData<float>(detection_classes)[output_box_index] =
0.0f;
// detection_scores
tflite::micro::GetTensorData<float>(detection_scores)[output_box_index] =
0.0f;
}
}
tflite::micro::GetTensorData<float>(num_detections)[0] =
size_of_sorted_indices;
return kTfLiteOk;
}
// This function implements a fast version of Non Maximal Suppression for
// multiple classes where
// 1) we keep the top-k scores for each anchor and
// 2) during NMS, each anchor only uses the highest class score for sorting.
// 3) Compared to standard NMS, the worst runtime of this version is O(N^2)
// instead of O(KN^2) where N is the number of anchors and K the number of
// classes.
TfLiteStatus NonMaxSuppressionMultiClassFastHelper(TfLiteContext* context,
TfLiteNode* node,
OpData* op_data,
const float* scores) {
const TfLiteEvalTensor* input_box_encodings =
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
const TfLiteEvalTensor* input_class_predictions =
tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
TfLiteEvalTensor* detection_boxes =
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes);
TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput(
context, node, kOutputTensorDetectionClasses);
TfLiteEvalTensor* detection_scores =
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores);
TfLiteEvalTensor* num_detections =
tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections);
const int num_boxes = input_box_encodings->dims->data[1];
const int num_classes = op_data->num_classes;
const int max_categories_per_anchor = op_data->max_classes_per_detection;
const int num_classes_with_background =
input_class_predictions->dims->data[2];
// The row index offset is 1 if background class is included and 0 otherwise.
int label_offset = num_classes_with_background - num_classes;
TF_LITE_ENSURE(context, (max_categories_per_anchor > 0));
const int num_categories_per_anchor =
std::min(max_categories_per_anchor, num_classes);
float* max_scores = reinterpret_cast<float*>(
context->GetScratchBuffer(context, op_data->score_buffer_idx));
int* sorted_class_indices = reinterpret_cast<int*>(
context->GetScratchBuffer(context, op_data->buffer_idx));
for (int row = 0; row < num_boxes; row++) {
const float* box_scores =
scores + row * num_classes_with_background + label_offset;
int* class_indices = sorted_class_indices + row * num_classes;
DecreasingPartialArgSort(box_scores, num_classes, num_categories_per_anchor,
class_indices);
max_scores[row] = box_scores[class_indices[0]];
}
// Perform non-maximal suppression on max scores
int selected_size = 0;
int* selected = reinterpret_cast<int*>(
context->GetScratchBuffer(context, op_data->selected_idx));
TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper(
context, node, op_data, max_scores, selected, &selected_size,
op_data->max_detections));
// Allocate output tensors
int output_box_index = 0;
for (int i = 0; i < selected_size; i++) {
int selected_index = selected[i];
const float* box_scores =
scores + selected_index * num_classes_with_background + label_offset;
const int* class_indices =
sorted_class_indices + selected_index * num_classes;
for (int col = 0; col < num_categories_per_anchor; ++col) {
int box_offset = num_categories_per_anchor * output_box_index + col;
// detection_boxes
float* decoded_boxes = reinterpret_cast<float*>(
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
ReInterpretTensor<BoxCornerEncoding*>(detection_boxes)[box_offset] =
reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[selected_index];
// detection_classes
tflite::micro::GetTensorData<float>(detection_classes)[box_offset] =
class_indices[col];
// detection_scores
tflite::micro::GetTensorData<float>(detection_scores)[box_offset] =
box_scores[class_indices[col]];
output_box_index++;
}
}
tflite::micro::GetTensorData<float>(num_detections)[0] = output_box_index;
return kTfLiteOk;
}
TfLiteStatus NonMaxSuppressionMultiClass(TfLiteContext* context,
TfLiteNode* node, OpData* op_data) {
// Get the input tensors
const TfLiteEvalTensor* input_box_encodings =
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
const TfLiteEvalTensor* input_class_predictions =
tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
const int num_boxes = input_box_encodings->dims->data[1];
const int num_classes = op_data->num_classes;
TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[0],
kBatchSize);
TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[1], num_boxes);
const int num_classes_with_background =
input_class_predictions->dims->data[2];
TF_LITE_ENSURE(context, (num_classes_with_background - num_classes <= 1));
TF_LITE_ENSURE(context, (num_classes_with_background >= num_classes));
const float* scores;
switch (input_class_predictions->type) {
case kTfLiteFloat32:
scores = tflite::micro::GetTensorData<float>(input_class_predictions);
break;
default:
// Unsupported type.
return kTfLiteError;
}
if (op_data->use_regular_non_max_suppression) {
TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClassRegularHelper(
context, node, op_data, scores));
} else {
TF_LITE_ENSURE_STATUS(
NonMaxSuppressionMultiClassFastHelper(context, node, op_data, scores));
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, (kBatchSize == 1));
auto* op_data = static_cast<OpData*>(node->user_data);
// These two functions correspond to two blocks in the Object Detection model.
// In future, we would like to break the custom op in two blocks, which is
// currently not feasible because we would like to input quantized inputs
// and do all calculations in float. Mixed quantized/float calculations are
// currently not supported in TFLite.
// This fills in temporary decoded_boxes
// by transforming input_box_encodings and input_anchors from
// CenterSizeEncodings to BoxCornerEncoding
TF_LITE_ENSURE_STATUS(DecodeCenterSizeBoxes(context, node, op_data));
// This fills in the output tensors
// by choosing effective set of decoded boxes
// based on Non Maximal Suppression, i.e. selecting
// highest scoring non-overlapping boxes.
TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClass(context, node, op_data));
return kTfLiteOk;
}
} // namespace
TfLiteRegistration* Register_DETECTION_POSTPROCESS() {
static TfLiteRegistration r = tflite::micro::RegisterOp(Init, Prepare, Eval);
return &r;
}
} // namespace tflite

View File

@@ -1,25 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
#define TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
extern const int g_gen_data_size_none_regular_nms;
extern const unsigned char g_gen_data_none_regular_nms[];
extern const int g_gen_data_size_regular_nms;
extern const unsigned char g_gen_data_regular_nms[];
#endif

View File

@@ -1,11 +0,0 @@
# Info
These are the Espressif chipset specific replacement kernels.
The kernels call optimized routines or reference routines depending upon optimization option selected.
By default optimizations are selected if available.
To change this behaviour, please make the appropriate `ESP-NN` menu selection after running:
```
idf.py menuconfig
```

View File

@@ -1,202 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/add.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include <esp_timer.h>
#if ESP_NN
#include <esp_nn.h>
#endif
long long add_total_time = 0;
namespace tflite {
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
const OpDataAdd* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
SetActivationParams(data->output_activation_min_f32,
data->output_activation_max_f32, &op_params);
if (data->requires_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
}
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteAddParams* params, const OpDataAdd* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
switch (output->type) {
case kTfLiteInt8: {
if (need_broadcast) {
reference_integer_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
#if ESP_NN
const int8_t *input1_data = tflite::micro::GetTensorData<int8_t>(input1);
const int8_t *input2_data = tflite::micro::GetTensorData<int8_t>(input2);
int8_t *out_data = tflite::micro::GetTensorData<int8_t>(output);
esp_nn_add_elementwise_s8(input1_data,
input2_data,
data->input1_offset,
data->input2_offset,
data->input1_multiplier,
data->input2_multiplier,
data->input1_shift,
data->input2_shift,
data->left_shift,
out_data,
data->output_offset,
data->output_multiplier,
data->output_shift,
data->output_activation_min,
data->output_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output))
);
#else
reference_integer_ops::Add(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
}
break;
}
case kTfLiteInt16: {
if (need_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output),
false);
}
break;
}
default:
MicroPrintf("Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
void* AddInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd));
}
TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kAddInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kAddInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kAddOutputTensor);
long long start_time = esp_timer_get_time();
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
output->type);
return kTfLiteError;
}
add_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
TfLiteRegistration Register_ADD() {
return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval);
}
} // namespace tflite

View File

@@ -1,191 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/fully_connected.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#if ESP_NN
#include <esp_nn.h>
#endif
#include <esp_timer.h>
long long fc_total_time = 0;
namespace tflite {
namespace {
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context,
sizeof(OpDataFullyConnected));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* data = static_cast<OpDataFullyConnected*>(node->user_data);
const auto params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* filter = micro_context->AllocateTempInputTensor(
node, kFullyConnectedWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
node, kFullyConnectedOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected(
context, params->activation, input->type,
input, filter, bias, output, data));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(filter);
if (bias != nullptr) {
micro_context->DeallocateTempTfLiteTensor(bias);
}
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto* params =
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor);
const TfLiteEvalTensor* bias =
tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const auto& data =
*(static_cast<const OpDataFullyConnected*>(node->user_data));
long long start_time = esp_timer_get_time();
// Checks in Prepare ensure input, output and filter types are all the same.
switch (input->type) {
case kTfLiteFloat32: {
tflite::reference_ops::FullyConnected(
FullyConnectedParamsFloat(params->activation),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<float>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<float>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
}
case kTfLiteInt8: {
const int32_t* bias_data =
nullptr != bias ? tflite::micro::GetTensorData<int32_t>(bias)
: nullptr;
#if ESP_NN
const RuntimeShape& filter_shape = tflite::micro::GetTensorShape(filter);
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
const int filter_dim_count = filter_shape.DimensionsCount();
const int batches = output_shape.Dims(0);
const int output_depth = output_shape.Dims(1);
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int8_t *filter_data = tflite::micro::GetTensorData<int8_t>(filter);
for (int b = 0; b < batches; ++b) {
esp_nn_fully_connected_s8(input_data, -data.input_zero_point,
accum_depth,
filter_data, -data.filter_zero_point,
bias_data, output_data, output_depth,
data.output_zero_point,
data.output_shift, data.output_multiplier,
data.output_activation_min,
data.output_activation_max);
input_data += accum_depth;
output_data += output_depth;
}
#else
tflite::reference_integer_ops::FullyConnected(
FullyConnectedParamsQuantized(data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias), bias_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
#endif
break;
}
case kTfLiteUInt8: {
tflite::reference_ops::FullyConnected(
FullyConnectedParamsQuantized(data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
}
default: {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);
return kTfLiteError;
}
}
fc_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_FULLY_CONNECTED() {
return tflite::micro::RegisterOp(Init, Prepare, Eval);
}
} // namespace tflite

View File

@@ -1,124 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/mul.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
#include "tensorflow/lite/kernels/internal/reference/mul.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#if ESP_NN
#include <esp_nn.h>
#endif
#include <esp_timer.h>
long long mul_total_time = 0;
namespace tflite {
#if ESP_NN
void MulEvalQuantized(TfLiteContext* context, TfLiteNode* node,
const OpDataMul* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.float_activation_max = data->output_activation_max_f32;
op_params.input1_offset = -data->input1_zero_point;
op_params.input2_offset = -data->input2_zero_point;
op_params.output_offset = data->output_zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
const int8_t *input1_data = tflite::micro::GetTensorData<int8_t>(input1);
const int8_t *input2_data = tflite::micro::GetTensorData<int8_t>(input2);
int8_t *out_data = tflite::micro::GetTensorData<int8_t>(output);
esp_nn_mul_elementwise_s8(input1_data, input2_data, op_params.input1_offset,
op_params.input2_offset, out_data, op_params.output_offset,
op_params.output_multiplier, op_params.output_shift,
op_params.quantized_activation_min, op_params.quantized_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output)));
}
}
#endif
TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
long long start_time = esp_timer_get_time();
switch (input1->type) {
case kTfLiteInt8:
#if ESP_NN
MulEvalQuantized(context, node, data, input1, input2, output);
#else
EvalMulQuantizedReference(context, node, data, input1, input2, output);
#endif
break;
case kTfLiteInt32:
EvalMulQuantizedReference(context, node, data, input1, input2, output);
break;
case kTfLiteFloat32:
EvalMulFloatReference(context, node, params, data, input1, input2,
output);
break;
default:
MicroPrintf("Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
mul_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
TfLiteRegistration Register_MUL() {
return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval);
}
} // namespace tflite

View File

@@ -1,231 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/pooling.h"
#if ESP_NN
#include <esp_nn.h>
#endif
#include <esp_timer.h>
long long pooling_total_time = 0;
namespace tflite {
namespace {
#if ESP_NN
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
const int stride_height = params->stride_height;
const int stride_width = params->stride_width;
const int filter_height = params->filter_height;
const int filter_width = params->filter_width;
const int activation_min = data->activation_min;
const int activation_max = data->activation_max;
const int pad_height = data->padding.height;
const int pad_width = data->padding.width;
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
TFLITE_DCHECK_LE(activation_min, activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int input_size = input_width * input_height * depth;
const int output_size = output_width * output_height * depth;
if (depth % 4 == 0) { // S3 version only supports channels multiple of 4
for (int batch = 0; batch < batches; ++batch) {
esp_nn_avg_pool_s8(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
} else {
for (int batch = 0; batch < batches; ++batch) {
esp_nn_avg_pool_s8_ansi(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
}
}
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
const int stride_height = params->stride_height;
const int stride_width = params->stride_width;
const int filter_height = params->filter_height;
const int filter_width = params->filter_width;
const int activation_min = data->activation_min;
const int activation_max = data->activation_max;
const int pad_height = data->padding.height;
const int pad_width = data->padding.width;
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
TFLITE_DCHECK_LE(activation_min, activation_max);
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
const int input_size = input_width * input_height * depth;
const int output_size = output_width * output_height * depth;
if (depth % 4 == 0) { // S3 version only supports channels multiple of 4
for (int batch = 0; batch < batches; ++batch) {
esp_nn_max_pool_s8(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
} else {
for (int batch = 0; batch < batches; ++batch) {
esp_nn_max_pool_s8_ansi(input_data, input_width, input_height,
output_data, output_width, output_height,
stride_width, stride_height,
filter_width, filter_height,
pad_width, pad_height,
activation_min, activation_max, depth);
input_data += input_size;
output_data += output_size;
}
}
}
#endif
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataPooling* data =
static_cast<const OpDataPooling*>(node->user_data);
const TfLiteEvalTensor* input =
micro::GetEvalInput(context, node, kPoolingInputTensor);
TfLiteEvalTensor* output =
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
long long start_time = esp_timer_get_time();
// Inputs and outputs share the same type, guaranteed by the converter.
switch (input->type) {
case kTfLiteFloat32:
AveragePoolingEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteInt8:
#if ESP_NN
AverageEvalQuantized(context, node, params, data, input, output);
#else
AveragePoolingEvalQuantized(context, node, params, data, input, output);
#endif
break;
default:
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
pooling_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataPooling* data =
static_cast<const OpDataPooling*>(node->user_data);
const TfLiteEvalTensor* input =
micro::GetEvalInput(context, node, kPoolingInputTensor);
TfLiteEvalTensor* output =
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
long long start_time = esp_timer_get_time();
switch (input->type) {
case kTfLiteFloat32:
MaxPoolingEvalFloat(context, node, params, data, input, output);
break;
case kTfLiteInt8:
#if ESP_NN
MaxEvalQuantized(context, node, params, data, input, output);
#else
MaxPoolingEvalQuantized(context, node, params, data, input, output);
#endif
break;
default:
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
pooling_total_time += esp_timer_get_time() - start_time;
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
}
} // namespace
TfLiteRegistration Register_AVERAGE_POOL_2D() {
return tflite::micro::RegisterOp(Init, PoolingPrepare, AverageEval);
}
TfLiteRegistration Register_MAX_POOL_2D() {
return tflite::micro::RegisterOp(Init, PoolingPrepare, MaxEval);
}
} // namespace tflite

View File

@@ -1,27 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
//
// This is a stub file for non-Ethos platforms
//
#include "tensorflow/lite/c/common.h"
namespace tflite {
TfLiteRegistration* Register_ETHOSU() { return nullptr; }
const char* GetString_ETHOSU() { return ""; }
} // namespace tflite

View File

@@ -1,28 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_
#include "tensorflow/lite/c/common.h"
namespace tflite {
TfLiteRegistration* Register_ETHOSU();
const char* GetString_ETHOSU();
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_

View File

@@ -1,50 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/floor.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace floor {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Floor(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace floor
TfLiteRegistration Register_FLOOR() {
return tflite::micro::RegisterOp(nullptr, nullptr, floor::Eval);
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -1,104 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
struct OpDataFullyConnected {
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
// The range of the fused activation layer. For example for kNone and
// uint8_t these would be 0 and 255.
int32_t output_activation_min;
int32_t output_activation_max;
// The index of the temporary tensor where the quantized inputs are cached.
int input_quantized_index;
// Cached zero point values of tensors.
int32_t input_zero_point;
int32_t filter_zero_point;
int32_t output_zero_point;
};
extern const int kFullyConnectedInputTensor;
extern const int kFullyConnectedWeightsTensor;
extern const int kFullyConnectedBiasTensor;
extern const int kFullyConnectedOutputTensor;
// Returns a FullyConnectedParams struct with all the parameters needed for a
// float computation.
FullyConnectedParams FullyConnectedParamsFloat(
TfLiteFusedActivation activation);
// Returns a FullyConnectedParams struct with all the parameters needed for a
// quantized computation.
FullyConnectedParams FullyConnectedParamsQuantized(
const OpDataFullyConnected& op_data);
TfLiteStatus CalculateOpDataFullyConnected(
TfLiteContext* context, TfLiteFusedActivation activation,
TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output, OpDataFullyConnected* data);
// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_FULLY_CONNECTED();
#if defined(CMSIS_NN) || defined(HEXAGON)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8.
TfLiteRegistration Register_FULLY_CONNECTED_INT8();
#else
// Note that while this block gets used for both reference and optimized kernels
// that do not have any specialized implementations, the only goal here is to
// define fallback implementation that allow reference kernels to still be used
// from applications that call a more specific kernel variant.
inline TfLiteRegistration Register_FULLY_CONNECTED_INT8() {
return Register_FULLY_CONNECTED();
}
#endif
#if defined(CMSIS_NN)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int16.
TfLiteRegistration Register_FULLY_CONNECTED_INT16();
#else
// Note that while this block gets used for both reference and optimized kernels
// that do not have any specialized implementations, the only goal here is to
// define fallback implementation that allow reference kernels to still be used
// from applications that call a more specific kernel variant.
inline TfLiteRegistration Register_FULLY_CONNECTED_INT16() {
return Register_FULLY_CONNECTED();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_

View File

@@ -1,83 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/fully_connected.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
const int kFullyConnectedInputTensor = 0;
const int kFullyConnectedWeightsTensor = 1;
const int kFullyConnectedBiasTensor = 2;
const int kFullyConnectedOutputTensor = 0;
FullyConnectedParams FullyConnectedParamsQuantized(
const OpDataFullyConnected& op_data) {
FullyConnectedParams op_params;
op_params.input_offset = -op_data.input_zero_point;
op_params.weights_offset = -op_data.filter_zero_point;
op_params.output_offset = op_data.output_zero_point;
op_params.output_multiplier = op_data.output_multiplier;
op_params.output_shift = op_data.output_shift;
op_params.quantized_activation_min = op_data.output_activation_min;
op_params.quantized_activation_max = op_data.output_activation_max;
return op_params;
}
FullyConnectedParams FullyConnectedParamsFloat(
TfLiteFusedActivation activation) {
FullyConnectedParams op_params;
CalculateActivationRange(activation, &op_params.float_activation_min,
&op_params.float_activation_max);
return op_params;
}
TfLiteStatus CalculateOpDataFullyConnected(
TfLiteContext* context, TfLiteFusedActivation activation,
TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter,
const TfLiteTensor* bias, TfLiteTensor* output,
OpDataFullyConnected* data) {
if (data_type != kTfLiteFloat32) {
double real_multiplier = 0.0;
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
context, input, filter, bias, output, &real_multiplier));
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
&data->output_shift);
data->input_zero_point = input->params.zero_point;
// Filter weights will always be symmetric quantized since we only support
// int8 quantization. See
// https://github.com/tensorflow/tensorflow/issues/44912 for additional
// context.
TFLITE_DCHECK(filter->params.zero_point == 0);
data->filter_zero_point = filter->params.zero_point;
data->output_zero_point = output->params.zero_point;
return CalculateActivationRangeQuantized(context, activation, output,
&data->output_activation_min,
&data->output_activation_max);
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,75 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/hard_swish.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
}
TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kHardSwishInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kHardSwishOutputTensor);
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
switch (input->type) {
case kTfLiteFloat32: {
tflite::reference_ops::HardSwish<float>(
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} break;
case kTfLiteInt8: {
tflite::reference_ops::HardSwish<int8_t>(
*params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} break;
default: {
MicroPrintf("Unsupported type %s", TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_HARD_SWISH() {
return tflite::micro::RegisterOp(HardSwishInit, tflite::HardSwishPrepare,
HardSwishEval);
}
} // namespace tflite

View File

@@ -1,30 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
extern const int kHardSwishInputTensor;
extern const int kHardSwishOutputTensor;
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_

View File

@@ -1,86 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/hard_swish.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
const int kHardSwishInputTensor = 0;
const int kHardSwishOutputTensor = 0;
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kHardSwishInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kHardSwishOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
params->input_zero_point = input->params.zero_point;
params->output_zero_point = output->params.zero_point;
const float input_scale = input->params.scale;
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
const float reluish_scale = 3.0f / 32768.0f;
const float output_scale = output->params.scale;
const double output_multiplier =
static_cast<double>(hires_input_scale / output_scale);
int32_t output_multiplier_fixedpoint_int32;
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
&params->output_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
output_multiplier_fixedpoint_int32,
&params->output_multiplier_fixedpoint_int16);
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
const double reluish_multiplier =
static_cast<double>(hires_input_scale / reluish_scale);
int32_t reluish_multiplier_fixedpoint_int32;
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_exponent);
DownScaleInt32ToInt16Multiplier(
reluish_multiplier_fixedpoint_int32,
&params->reluish_multiplier_fixedpoint_int16);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,121 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
struct OpData {
int then_subgraph_index;
int else_subgraph_index;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const auto* params =
reinterpret_cast<const TfLiteIfParams*>(node->builtin_data);
op_data->then_subgraph_index = params->then_subgraph_index;
op_data->else_subgraph_index = params->else_subgraph_index;
TF_LITE_ENSURE(context, node->inputs->size > 0);
// The first input is the condition.
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, cond != nullptr);
TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool);
TF_LITE_ENSURE_EQ(context, NumElements(cond), 1);
micro_context->DeallocateTempTfLiteTensor(cond);
// The first input of the node is the condition. The rest of inputs are
// passed to the branch subgraphs. Therefore, the number of subgraph inputs
// will be the number of node inputs - 1.
size_t num_inputs = node->inputs->size - 1;
size_t num_outputs = node->outputs->size;
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE(context,
op_data->then_subgraph_index < graph_info.NumSubgraphs());
TF_LITE_ENSURE(context,
op_data->else_subgraph_index < graph_info.NumSubgraphs());
TF_LITE_ENSURE_EQ(context, num_inputs,
graph_info.NumSubgraphInputs(op_data->then_subgraph_index));
TF_LITE_ENSURE_EQ(
context, num_outputs,
graph_info.NumSubgraphOutputs(op_data->then_subgraph_index));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, cond != nullptr);
bool cond_value = cond->data.b[0];
micro_context->DeallocateTempTfLiteTensor(cond);
MicroGraph* graph_info = &micro_context->graph();
// Currently we copy the input / output between the subgraphs.
int active_branch_subgraph_index =
cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index;
TF_LITE_ENSURE_OK(context,
tflite::micro::CopyOpInputsToSubgraphInputs(
context, node, graph_info, active_branch_subgraph_index,
/*first_tensor_idx=*/1));
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(active_branch_subgraph_index));
TF_LITE_ENSURE_OK(
context, tflite::micro::CopySubgraphOutputsToOpOutputs(
context, node, graph_info, active_branch_subgraph_index));
return kTfLiteOk;
}
} // namespace.
TfLiteRegistration Register_IF() {
return tflite::micro::RegisterOp(Init, Prepare, Eval);
}
} // namespace tflite

View File

@@ -1,95 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/leaky_relu.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
template <typename T>
void QuantizeLeakyRelu(const LeakyReluOpData& data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
LeakyReluParams op_params = {};
op_params.input_offset = data.input_zero_point;
op_params.output_offset = data.output_zero_point;
op_params.output_multiplier_alpha = data.output_multiplier_alpha;
op_params.output_shift_alpha = data.output_shift_alpha;
op_params.output_multiplier_identity = data.output_multiplier_identity;
op_params.output_shift_identity = data.output_shift_identity;
reference_ops::QuantizeLeakyRelu(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<T>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output));
}
void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(LeakyReluOpData));
}
TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
const LeakyReluOpData& data = *static_cast<LeakyReluOpData*>(node->user_data);
switch (input->type) {
case kTfLiteFloat32: {
LeakyReluParams op_params = {};
const auto* params =
static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
op_params.alpha = params->alpha;
reference_ops::LeakyRelu(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteInt8: {
QuantizeLeakyRelu<int8_t>(data, input, output);
return kTfLiteOk;
} break;
case kTfLiteInt16: {
QuantizeLeakyRelu<int16_t>(data, input, output);
return kTfLiteOk;
} break;
default:
MicroPrintf("Only float32, int8 are supported by LEAKY_RELU, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
return kTfLiteError;
}
TfLiteRegistration Register_LEAKY_RELU() {
return tflite::micro::RegisterOp(LeakyReluInit, LeakyReluPrepare,
LeakyReluEval);
}
} // namespace tflite

View File

@@ -1,43 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Input/output tensor index.
extern const int kInputTensor;
extern const int kOutputTensor;
struct LeakyReluOpData {
// quantization parameters
int32_t output_multiplier_alpha;
int32_t output_shift_alpha;
int32_t output_multiplier_identity;
int32_t output_shift_identity;
int32_t input_zero_point;
int32_t output_zero_point;
};
TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context, TfLiteNode* node);
TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_

View File

@@ -1,78 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/leaky_relu.h"
namespace tflite {
// Input/output tensor index.
const int kInputTensor = 0;
const int kOutputTensor = 0;
TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context,
TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
LeakyReluOpData* data = static_cast<LeakyReluOpData*>(node->user_data);
const auto* params =
static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
data->input_zero_point = input->params.zero_point;
data->output_zero_point = output->params.zero_point;
int output_shift_alpha;
double alpha_multiplier = static_cast<double>(
input->params.scale * params->alpha / output->params.scale);
QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha,
&output_shift_alpha);
data->output_shift_alpha = static_cast<int32_t>(output_shift_alpha);
int output_shift_identity;
double identity_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
&output_shift_identity);
data->output_shift_identity = static_cast<int32_t>(output_shift_identity);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpDataLeakyRelu(context, node);
}
} // namespace tflite

View File

@@ -1,44 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/logical.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
return LogicalImpl(context, node, LogicalOr);
}
TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
return LogicalImpl(context, node, LogicalAnd);
}
} // namespace
TfLiteRegistration Register_LOGICAL_OR() {
return tflite::micro::RegisterOp(nullptr, nullptr, LogicalOrEval);
}
TfLiteRegistration Register_LOGICAL_AND() {
return tflite::micro::RegisterOp(nullptr, nullptr, LogicalAndEval);
}
} // namespace tflite

View File

@@ -1,35 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Input/output tensor index.
extern const int kLogicalInputTensor1;
extern const int kLogicalInputTensor2;
extern const int kLogicalOutputTensor;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool));
bool LogicalOr(bool x, bool y);
bool LogicalAnd(bool x, bool y);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_

View File

@@ -1,63 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/logical.h"
namespace tflite {
// Input/output tensor index.
const int kLogicalInputTensor1 = 0;
const int kLogicalInputTensor2 = 1;
const int kLogicalOutputTensor = 0;
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
bool (*func)(bool, bool)) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kLogicalInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kLogicalInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kLogicalOutputTensor);
if (tflite::micro::HaveSameShapes(input1, input2)) {
reference_ops::BinaryFunction<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
} else {
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<bool>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<bool>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<bool>(output), func);
}
return kTfLiteOk;
}
bool LogicalOr(bool x, bool y) { return x || y; }
bool LogicalAnd(bool x, bool y) { return x && y; }
} // namespace tflite

View File

@@ -1,111 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/logistic.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
namespace {
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic));
}
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kLogisticInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteFloat32: {
reference_ops::Logistic(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
default:
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt16) {
switch (output->type) {
case kTfLiteInt16: {
reference_integer_ops::Logistic(
data->input_multiplier, data->input_left_shift,
NumElements(input->dims),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
}
default:
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
switch (output->type) {
case kTfLiteInt8: {
reference_integer_ops::Logistic(
data->input_zero_point, data->input_range_radius,
data->input_multiplier, data->input_left_shift,
NumElements(input->dims),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default:
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
// TODO(b/141211002): Also support other data types once we have supported
// temporary tensors in TFLM.
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_LOGISTIC() {
return tflite::micro::RegisterOp(LogisticInit, LogisticPrepare, LogisticEval);
}
} // namespace tflite

View File

@@ -1,42 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
extern const int kLogisticInputTensor;
extern const int kLogisticOutputTensor;
struct OpDataLogistic {
int32_t input_zero_point;
int32_t input_range_radius;
int32_t input_multiplier;
int input_left_shift;
};
TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
TfLiteNode* node,
OpDataLogistic* data);
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_

View File

@@ -1,119 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/logistic.h"
namespace tflite {
const int kLogisticInputTensor = 0;
const int kLogisticOutputTensor = 0;
TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
TfLiteNode* node,
OpDataLogistic* data) {
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kLogisticInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kLogisticOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
std::numeric_limits<int8_t>::min());
static constexpr int kInputIntegerBits = 4;
const double input_real_multiplier =
static_cast<double>(input->params.scale) *
static_cast<double>(1 << (31 - kInputIntegerBits));
data->input_zero_point = input->params.zero_point;
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
data->input_range_radius =
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
}
if (input->type == kTfLiteInt16) {
static constexpr int kInputIntegerBits = 3;
static constexpr int kOutputFractionalBits = 15;
// See comments in TanhPrepare about requiring zero_point==0
// and a power-of-two ("POT") scale.
TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
int input_scale_log2_rounded;
bool param_scale_pot =
CheckedLog2(input->params.scale, &input_scale_log2_rounded);
data->input_left_shift =
(15 - kInputIntegerBits) + input_scale_log2_rounded;
param_scale_pot &= (data->input_left_shift == 0);
if (param_scale_pot) {
data->input_multiplier = 0;
} else {
// Calculate multiplier to change input scale to 1/(3*4096)
// as required by the table lookup.
// In this scaling +/-2^17 represents +/-10.7
double multiplier =
static_cast<double>(input->params.scale) * 4096.0 * 3.0;
data->input_left_shift = 0;
while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
data->input_left_shift++;
multiplier = multiplier * 2.0;
}
data->input_multiplier = static_cast<int32_t>(multiplier);
}
int output_scale_log2_rounded;
TF_LITE_ENSURE(
context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
-kOutputFractionalBits);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
return CalculateArithmeticOpDataLogistic(context, node, data);
}
} // namespace tflite

View File

@@ -1,250 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_
#include <cstdint>
#include <memory>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
// Pamameters for integer LSTM.
// Consider split this into two Integer Parameters if more fields are added.
struct IntegerLstmParameter {
int32_t effective_input_to_input_scale_a;
int32_t effective_input_to_input_scale_b;
int32_t effective_recurrent_to_input_scale_a;
int32_t effective_recurrent_to_input_scale_b;
int32_t effective_cell_to_input_scale_a;
int32_t effective_cell_to_input_scale_b;
int32_t effective_input_to_forget_scale_a;
int32_t effective_input_to_forget_scale_b;
int32_t effective_recurrent_to_forget_scale_a;
int32_t effective_recurrent_to_forget_scale_b;
int32_t effective_cell_to_forget_scale_a;
int32_t effective_cell_to_forget_scale_b;
int32_t effective_input_to_cell_scale_a;
int32_t effective_input_to_cell_scale_b;
int32_t effective_recurrent_to_cell_scale_a;
int32_t effective_recurrent_to_cell_scale_b;
int32_t effective_input_to_output_scale_a;
int32_t effective_input_to_output_scale_b;
int32_t effective_recurrent_to_output_scale_a;
int32_t effective_recurrent_to_output_scale_b;
int32_t effective_cell_to_output_scale_a;
int32_t effective_cell_to_output_scale_b;
int32_t effective_proj_scale_a;
int32_t effective_proj_scale_b;
int32_t effective_hidden_scale_a;
int32_t effective_hidden_scale_b;
int32_t layer_norm_input_scale_a;
int32_t layer_norm_input_scale_b;
int32_t layer_norm_forget_scale_a;
int32_t layer_norm_forget_scale_b;
int32_t layer_norm_cell_scale_a;
int32_t layer_norm_cell_scale_b;
int32_t layer_norm_output_scale_a;
int32_t layer_norm_output_scale_b;
// Quantized clip value for cell and projection. Zero value means no clipping.
int16_t quantized_cell_clip;
int8_t quantized_proj_clip;
int32_t hidden_zp;
int32_t cell_scale;
int32_t input_variance_guard;
int32_t forget_variance_guard;
int32_t cell_variance_guard;
int32_t output_variance_guard;
// Pre-calculate bias + zero_point * weight.
int32_t* input_to_forget_effective_bias;
int32_t* recurrent_to_forget_effective_bias;
int32_t* input_to_cell_effective_bias;
int32_t* recurrent_to_cell_effective_bias;
int32_t* input_to_output_effective_bias;
int32_t* recurrent_to_output_effective_bias;
int32_t* input_to_input_effective_bias;
int32_t* recurrent_to_input_effective_bias;
int32_t* projection_effective_bias;
// Scale and zero point for intermediate tensors.
// Used only in the 8x8_8 case.
int32_t intermediate_scale_a[8];
int32_t intermediate_scale_b[8];
int32_t intermediate_zp[12];
};
// Scales for hybrid op with integer inputs and float weights
struct HybridLstmScales {
float input_to_input_weights_scale;
float input_to_forget_weights_scale;
float input_to_cell_weights_scale;
float input_to_output_weights_scale;
float aux_input_to_input_weights_scale;
float aux_input_to_forget_weights_scale;
float aux_input_to_cell_weights_scale;
float aux_input_to_output_weights_scale;
float recurrent_to_input_weights_scale;
float recurrent_to_forget_weights_scale;
float recurrent_to_cell_weights_scale;
float recurrent_to_output_weights_scale;
float cell_to_input_weights_scale;
float cell_to_forget_weights_scale;
float cell_to_output_weights_scale;
float projection_weights_scale;
};
TfLiteStatus EvalFloatLstm(
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* input_to_input_weights,
const TfLiteEvalTensor* input_to_forget_weights,
const TfLiteEvalTensor* input_to_cell_weights,
const TfLiteEvalTensor* input_to_output_weights,
const TfLiteEvalTensor* recurrent_to_input_weights,
const TfLiteEvalTensor* recurrent_to_forget_weights,
const TfLiteEvalTensor* recurrent_to_cell_weights,
const TfLiteEvalTensor* recurrent_to_output_weights,
const TfLiteEvalTensor* cell_to_input_weights,
const TfLiteEvalTensor* cell_to_forget_weights,
const TfLiteEvalTensor* cell_to_output_weights,
const TfLiteEvalTensor* input_layer_norm_coefficients,
const TfLiteEvalTensor* forget_layer_norm_coefficients,
const TfLiteEvalTensor* cell_layer_norm_coefficients,
const TfLiteEvalTensor* output_layer_norm_coefficients,
const TfLiteEvalTensor* aux_input,
const TfLiteEvalTensor* aux_input_to_input_weights,
const TfLiteEvalTensor* aux_input_to_forget_weights,
const TfLiteEvalTensor* aux_input_to_cell_weights,
const TfLiteEvalTensor* aux_input_to_output_weights,
const TfLiteEvalTensor* input_gate_bias,
const TfLiteEvalTensor* forget_gate_bias,
const TfLiteEvalTensor* cell_gate_bias,
const TfLiteEvalTensor* output_gate_bias,
const TfLiteEvalTensor* projection_weights,
const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
bool forward_sequence, bool time_major, int output_offset,
float* scratch_buffer, TfLiteEvalTensor* output_state,
TfLiteEvalTensor* cell_state, TfLiteEvalTensor* output);
TfLiteStatus EvalHybridLstm(
const HybridLstmScales* hybrid_lstm_scales, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* input_to_input_weights,
const TfLiteEvalTensor* input_to_input_weights_ledger,
const TfLiteEvalTensor* input_to_forget_weights,
const TfLiteEvalTensor* input_to_forget_weights_ledger,
const TfLiteEvalTensor* input_to_cell_weights,
const TfLiteEvalTensor* input_to_cell_weights_ledger,
const TfLiteEvalTensor* input_to_output_weights,
const TfLiteEvalTensor* input_to_output_weights_ledger,
const TfLiteEvalTensor* recurrent_to_input_weights,
const TfLiteEvalTensor* recurrent_to_input_weights_ledger,
const TfLiteEvalTensor* recurrent_to_forget_weights,
const TfLiteEvalTensor* recurrent_to_forget_weights_ledger,
const TfLiteEvalTensor* recurrent_to_cell_weights,
const TfLiteEvalTensor* recurrent_to_cell_weights_ledger,
const TfLiteEvalTensor* recurrent_to_output_weights,
const TfLiteEvalTensor* recurrent_to_output_weights_ledger,
const TfLiteEvalTensor* cell_to_input_weights,
const TfLiteEvalTensor* cell_to_forget_weights,
const TfLiteEvalTensor* cell_to_output_weights,
const TfLiteEvalTensor* input_layer_norm_coefficients,
const TfLiteEvalTensor* forget_layer_norm_coefficients,
const TfLiteEvalTensor* cell_layer_norm_coefficients,
const TfLiteEvalTensor* output_layer_norm_coefficients,
const TfLiteEvalTensor* aux_input,
const TfLiteEvalTensor* aux_input_to_input_weights,
const TfLiteEvalTensor* aux_input_to_forget_weights,
const TfLiteEvalTensor* aux_input_to_cell_weights,
const TfLiteEvalTensor* aux_input_to_output_weights,
const TfLiteEvalTensor* input_gate_bias,
const TfLiteEvalTensor* forget_gate_bias,
const TfLiteEvalTensor* cell_gate_bias,
const TfLiteEvalTensor* output_gate_bias,
const TfLiteEvalTensor* projection_weights,
const TfLiteEvalTensor* projection_weights_ledger,
const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
bool forward_sequence, bool time_major, int output_offset,
float* scratch_buffer, float* input_sf, float* aux_input_sf,
float* output_state_sf, float* prod_scaling_factors,
float* recovered_cell_weights, int8_t* input_quantized,
int8_t* aux_input_quantized, int8_t* output_state_quantized,
int8_t* cell_state_quantized, float* scales, TfLiteEvalTensor* output_state,
TfLiteEvalTensor* cell_state, int32_t* output_scratch_buffer,
TfLiteEvalTensor* output, int32_t* input_zp, int32_t* aux_input_zp,
int32_t* output_state_zp, int32_t* row_sums, int row_sums_size,
bool* compute_row_sums);
TfLiteStatus EvalInteger8x8_16Lstm(
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* input_to_input_weights,
const TfLiteEvalTensor* input_to_forget_weights,
const TfLiteEvalTensor* input_to_cell_weights,
const TfLiteEvalTensor* input_to_output_weights,
const TfLiteEvalTensor* recurrent_to_input_weights,
const TfLiteEvalTensor* recurrent_to_forget_weights,
const TfLiteEvalTensor* recurrent_to_cell_weights,
const TfLiteEvalTensor* recurrent_to_output_weights,
const TfLiteEvalTensor* cell_to_input_weights,
const TfLiteEvalTensor* cell_to_forget_weights,
const TfLiteEvalTensor* cell_to_output_weights,
const TfLiteEvalTensor* input_layer_norm_coefficients,
const TfLiteEvalTensor* forget_layer_norm_coefficients,
const TfLiteEvalTensor* cell_layer_norm_coefficients,
const TfLiteEvalTensor* output_layer_norm_coefficients,
const TfLiteEvalTensor* input_gate_bias,
const TfLiteEvalTensor* forget_gate_bias,
const TfLiteEvalTensor* cell_gate_bias,
const TfLiteEvalTensor* output_gate_bias,
const TfLiteEvalTensor* projection_weights,
const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
bool forward_sequence, bool time_major,
const IntegerLstmParameter* integer_lstm_param, int32_t output_state_zp,
TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state,
TfLiteEvalTensor* output, int16_t* scratch0, int16_t* scratch1,
int16_t* scratch2, int16_t* scratch3, int8_t* scratch4, int32_t* scratch5);
TfLiteStatus EvalInteger8x8_8Lstm(
const TfLiteEvalTensor* input,
const TfLiteEvalTensor* input_to_input_weights,
const TfLiteEvalTensor* input_to_forget_weights,
const TfLiteEvalTensor* input_to_cell_weights,
const TfLiteEvalTensor* input_to_output_weights,
const TfLiteEvalTensor* recurrent_to_input_weights,
const TfLiteEvalTensor* recurrent_to_forget_weights,
const TfLiteEvalTensor* recurrent_to_cell_weights,
const TfLiteEvalTensor* recurrent_to_output_weights,
const TfLiteEvalTensor* cell_to_input_weights,
const TfLiteEvalTensor* cell_to_forget_weights,
const TfLiteEvalTensor* cell_to_output_weights,
const TfLiteEvalTensor* input_layer_norm_coefficients,
const TfLiteEvalTensor* forget_layer_norm_coefficients,
const TfLiteEvalTensor* cell_layer_norm_coefficients,
const TfLiteEvalTensor* output_layer_norm_coefficients,
const TfLiteEvalTensor* input_gate_bias,
const TfLiteEvalTensor* forget_gate_bias,
const TfLiteEvalTensor* cell_gate_bias,
const TfLiteEvalTensor* output_gate_bias,
const TfLiteEvalTensor* projection_weights,
const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state,
TfLiteEvalTensor* output, const IntegerLstmParameter* integer_lstm_param,
int8_t* scratch0, int8_t* scratch1, int16_t* scratch2, int16_t* scratch3,
int16_t* scratch4, int16_t* scratch5, int16_t* scratch6, int16_t* scratch7);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_

View File

@@ -1,67 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
namespace tflite {
// Input Tensors of size {n_batch, n_input}
constexpr int kLstmInputTensor = 0;
// Input weight tensors of size: {n_cell, n_input}
constexpr int kLstmInputToInputWeightsTensor = 1; // Optional
constexpr int kLstmInputToForgetWeightsTensor = 2;
constexpr int kLstmInputToCellWeightsTensor = 3;
constexpr int kLstmInputToOutputWeightsTensor = 4;
// Recurrent weight tensors of size {n_cell, n_output}
constexpr int kLstmRecurrentToInputWeightsTensor = 5; // Optional
constexpr int kLstmRecurrentToForgetWeightsTensor = 6;
constexpr int kLstmRecurrentToCellWeightsTensor = 7;
constexpr int kLstmRecurrentToOutputWeightsTensor = 8;
// Peephole weights tensors of size {n_cell}, representing a diagonal matrix.
constexpr int kLstmCellToInputWeightsTensor = 9; // Optional
constexpr int kLstmCellToForgetWeightsTensor = 10; // Optional
constexpr int kLstmCellToOutputWeightsTensor = 11; // Optional
// Gates bias tensors of size {n_cell}
constexpr int kLstmInputGateBiasTensor = 12; // Optional
constexpr int kLstmForgetGateBiasTensor = 13;
constexpr int kLstmCellGateBiasTensor = 14;
constexpr int kLstmOutputGateBiasTensor = 15;
// Projection weight tensor of size {n_output, n_cell}
constexpr int kLstmProjectionWeightsTensor = 16; // Optional
// Projection bias tensor of size {n_output}
constexpr int kLstmProjectionBiasTensor = 17; // Optional
// These state tensors are defined as variable tensors, and will be modified by
// this op.
constexpr int kLstmOutputStateTensor = 18;
constexpr int kLstmCellStateTensor = 19;
// Layer norm coefficient tensors of size {n_cell}, representing a diagonal
// matrix.
constexpr int kLstmInputLayerNormCoefficientsTensor = 20; // Optional
constexpr int kLstmForgetLayerNormCoefficientsTensor = 21; // Optional
constexpr int kLstmCellLayerNormCoefficientsTensor = 22; // Optional
constexpr int kLstmOutputLayerNormCoefficientsTensor = 23; // Optional
// Output tensors.
constexpr int kLstmOutputTensor = 0;
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_

View File

@@ -1,874 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This file and the associated .cc file is branched from
// tensorflow/lite/kernels/internal/reference/portable_tensor_utils*
// TFLM needs to create its own because the original files are coupled with
// the tensor_utils module, which we cannot reuse due to its use of the
// Eigen library.
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_
#include <algorithm>
#include <cmath>
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#if defined(_MSC_VER)
#define __restrict__ __restrict
#endif
namespace tflite {
// Not all backends support CpuBackendContext usage, so forward declare to avoid
// pulling in its implementation.
// TODO(b/230666277): consider removing this since micro does not utilize it
class CpuBackendContext;
namespace micro_tensor_utils {
template <typename T>
inline bool PortableIsZeroVector(const T* vector, int v_size) {
for (int i = 0; i < v_size; ++i) {
if (vector[i] != 0) {
return false;
}
}
return true;
}
void PortableSymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* min_value,
float* max_value, float* scaling_factor);
void PortableSymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float min_value,
float max_value, float* scaling_factor);
void PortableAsymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values,
float* scaling_factor, int32_t* offset);
// Multiply a matrix by a batch vector, and store results in a batch-size
// vector.
void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
int m_rows, int m_cols,
const float* vector,
int n_batch, float* result);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result, const float* per_channel_scale,
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
bool* compute_row_sums, CpuBackendContext* context);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vector, const float* scaling_factors,
int n_batch, int32_t* scratch, float* __restrict__ result,
CpuBackendContext* context);
void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const float* __restrict__ vector, int n_batch, float* __restrict__ result);
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
float* __restrict__ result);
void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
int n_batch, const int32_t input_offset, const int32_t output_multiplier,
const int32_t output_shift, const int32_t output_offset,
const int32_t output_activation_min, const int32_t output_activation_max,
int8_t* __restrict__ result);
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
const int m_cols, const int8_t* __restrict__ vectors,
const float* scaling_factors, int n_batch, float* __restrict__ result);
// Dot product of two vectors.
float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
int v_size);
void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
const int16_t* vector2,
int v_size, int n_batch,
int32_t* result);
void PortableVectorBatchVectorCwiseProductAccumulate(
const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
int32_t multiplier, int shift, int16_t* result);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int16_t* output, CpuBackendContext* context);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int8_t* output, CpuBackendContext* context);
void PortableMatrixBatchVectorMultiply(const int8_t* input,
int32_t input_zeropoint,
const int8_t* input_to_gate_weights,
int32_t input_to_gate_effective_scale_a,
int32_t input_to_gate_effective_scale_b,
int32_t n_batch, int32_t n_input,
int32_t n_cell, int8_t* gate_output,
int8_t gate_output_zp);
void PortableMatrixBatchVectorMultiply(
const int16_t* hidden, const int8_t* hidden_to_output_weights,
int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
int32_t n_output, int32_t output_zp, int8_t* proj_output);
void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
int32_t scalar, int32_t n_row,
int32_t n_col, int32_t* output);
void PortableApplyLayerNorm(const int16_t* input,
const int16_t* layer_norm_weights,
const int32_t* bias, int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b, int32_t variance_limit,
int n_batch, int n_input, int16_t* output);
void PortableApplyLayerNormFloat(const int16_t* input,
const int16_t* layer_norm_weights,
int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b,
const int32_t* bias, int n_batch, int n_input,
int16_t* output);
void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output);
void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output);
void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
int32_t n_batch, int32_t n_input, int16_t* output);
void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int32_t integer_bits,
int16_t* output);
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int shift, int16_t* output);
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
int32_t multiplier, int32_t shift, int32_t n_batch,
int32_t n_input, int32_t output_zp, int8_t* output);
void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int16_t* output);
template <typename T>
inline void PortableCwiseClipping(T* vector, const int v_size,
const T& clipping_value) {
for (int i = 0; i < v_size; i++) {
vector[i] = std::max(std::min(clipping_value, vector[i]),
static_cast<T>(-clipping_value));
}
}
// Batch vector initialization with another vector.
void PortableVectorBatchVectorAssign(const float* vector, int v_size,
int n_batch, float* batch_vector);
// Compute "1.0f - elements of vector" (used in CIFG).
void PortableSub1Vector(const float* vector, int v_size, float* result);
void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result);
// Multiply all elements of vector with a scalar.
void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
float* result);
// Reduce-sum on a vector:
// input_vector: pointer to input vector.
// output_vector: pointer to vector.
// output_size: output vector size.
// reduction_size: number of consecutive elements from input vector which are
// added to get one element of output.
template <typename INPUT, typename OUTPUT>
inline void PortableReductionSumVector(const INPUT* input_vector,
OUTPUT* output_vector, int output_size,
int reduction_size) {
for (int o = 0; o < output_size; o++) {
OUTPUT result = 0;
for (int r = 0; r < reduction_size; r++) {
result += input_vector[r];
}
output_vector[o] = result;
input_vector += reduction_size;
}
}
// Layer norm for each batch.
void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
float* __restrict__ output_vector,
int v_size, int n_batch);
// Saturate Add.
void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
const int8_t* recurrent, int8_t recurrent_zp,
int32_t input_effective_scale_a,
int32_t input_effective_scale_b,
int32_t recurrent_effective_scale_a,
int32_t recurrent_effective_scale_b,
int32_t n_batch, int32_t n_cell,
int16_t* output);
// Add another vector for each batch in the batch vector.
template <typename T>
inline void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch,
T* batch_vector) {
for (int b = 0; b < n_batch; b++) {
for (int i = 0; i < v_size; ++i) {
batch_vector[i] += vector[i];
}
batch_vector += v_size;
}
}
// Cwise product of two vectors.
template <typename T>
inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2,
int v_size, T* result) {
for (int v = 0; v < v_size; v++) {
*result++ = *vector1++ * *vector2++;
}
}
// Cwise product of a vector and a batch-vector.
template <typename T>
inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size,
const T* batch_vector, int n_batch,
T* result) {
for (int b = 0; b < n_batch; b++) {
VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
// Update the pointers.
result += v_size;
batch_vector += v_size;
}
}
// Reduce-sum on a float input vector:
// input_vector: float pointer to input vector.
// output_vector: float pointer to vector.
// output_size: output vector size.
// reduction_size: number of consecutive elements from input vector which are
// added to get one element of output.
inline void ReductionSumVector(const float* input_vector, float* output_vector,
int output_size, int reduction_size) {
PortableReductionSumVector(input_vector, output_vector, output_size,
reduction_size);
}
// Same as above but input/output is 32 bit integer.
inline void ReductionSumVector(const int32_t* input_vector,
int32_t* output_vector, int output_size,
int reduction_size) {
PortableReductionSumVector(input_vector, output_vector, output_size,
reduction_size);
}
// Same as above but input is 8 bit integer.
inline void ReductionSumVector(const int8_t* input_vector,
int32_t* output_vector, int output_size,
int reduction_size) {
PortableReductionSumVector(input_vector, output_vector, output_size,
reduction_size);
}
// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
// assumption here is that result array is initialized to valid values.
template <typename T>
inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
const T* __restrict__ vector2,
int v_size,
T* __restrict__ result) {
for (int v = 0; v < v_size; v++) {
*result++ += *vector1++ * *vector2++;
}
}
// Batch vector initialization with another vector.
template <typename T>
inline void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
T* batch_vector) {
for (int b = 0; b < n_batch; b++) {
std::copy_n(vector, v_size, batch_vector + b * v_size);
}
}
inline void SymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* min,
float* max, float* scaling_factor) {
PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max,
scaling_factor);
}
inline void SymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float min_value,
float max_value, float* scaling_factor) {
PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value,
max_value, scaling_factor);
}
inline void AsymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values,
float* scaling_factor, int32_t* offset) {
PortableAsymmetricQuantizeFloats(values, size, quantized_values,
scaling_factor, offset);
}
// Helper function to quantize floats.
// float_data_ptr input float vectors
// n_batch number of input vectors
// n_data size of a single input vector
// quantized_data_ptr (out) vector with quantized data
// scaling_factors (out) scaling factors (one per vector)
// zero_points (out) zero points (one per vector)
// do_asymmetric controls if the quantization should be asymmetric.
inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
int n_data, int8_t* quantized_data_ptr,
float* scaling_factors, int32_t* zero_points,
bool do_asymmetric) {
for (int b = 0; b < n_batch; ++b) {
const int offset = b * n_data;
if (do_asymmetric) {
AsymmetricQuantizeFloats(float_data_ptr + offset, n_data,
quantized_data_ptr + offset, &scaling_factors[b],
&zero_points[b]);
} else {
float unused_min, unused_max;
SymmetricQuantizeFloats(float_data_ptr + offset, n_data,
quantized_data_ptr + offset, &unused_min,
&unused_max, &scaling_factors[b]);
}
}
}
// Check if all entries of a vector are zero for float.
inline bool IsZeroVector(const float* vector, int v_size) {
return PortableIsZeroVector(vector, v_size);
}
// Check if all entries of a vector are zero for int8_t.
inline bool IsZeroVector(const int8_t* vector, int v_size) {
return PortableIsZeroVector(vector, v_size);
}
// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized
// vector.
// Parameters:
// - input: batch vector of size n_batch * n_input; 16 bit.
// - layer_norm_weights: the quantized layer normalization weights.
// - bias: the bias for the layer normalization.
// - layer_norm_scale_a: multiplier for scale factor.
// - layer_norm_scale_b: shift for scale factor.
// - variance_limit: the guard to make sure the inverse does not overflow.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 16 bit output
inline void ApplyLayerNorm(const int16_t* input,
const int16_t* layer_norm_weights,
const int32_t* bias, int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b, int32_t variance_limit,
int n_batch, int n_input, int16_t* output) {
PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a,
layer_norm_scale_b, variance_limit, n_batch, n_input,
output);
}
// Same as above but the internal calculation is done in float.
inline void ApplyLayerNormFloat(const int16_t* input,
const int16_t* layer_norm_weights,
int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b, const int32_t* bias,
int n_batch, int n_input, int16_t* output) {
PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a,
layer_norm_scale_b, bias, n_batch, n_input,
output);
}
// Apply Sigmoid to a quantized vector.
// Parameters:
// - input: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 16 bit output
// The input is in Q3.12 format and the output is in Q0.15 format.
inline void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
int16_t* output) {
PortableApplySigmoid(input, n_batch, n_input, output);
}
// Same as above but the internal calcualtion is float.
inline void ApplySigmoidFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output) {
PortableApplySigmoidFloat(input, n_batch, n_input, output);
}
// Apply Tanh to a quantized vector.
// Parameters:
// - integer_bits: the integer bits of the input.
// Currently supports 0, 1, 2, 3, 4, 5, 6.
// - input: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 16 bit output
// The input is in Qm.15-m format and the output is in Q0.15 format.
inline void ApplyTanh(int32_t integer_bits, const int16_t* input,
int32_t n_batch, int32_t n_input, int16_t* output) {
PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
}
// Apply Tanh to a quantized vector. Tbe internal calculation is in float.
// - Input has 2^(integer_bits) as scale.
// - Output has Q0.15 as scale.
inline void ApplyTanhFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int32_t integer_bits,
int16_t* output) {
PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output);
}
// Element-wise multiplication of two quantized vectors.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - shift: the shift needed to produce the output.
// - output: the 16 bit output of size n_batch * n_input.
// Output does not need to be initialized.
inline void CwiseMul(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int shift, int16_t* output) {
PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output);
}
// Element-wise multiplication of two quantized vectors with rescaling.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - multiplier: the multiplier part of scale.
// - shift: the shift part of scale.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 8 bit output of size n_batch * n_input.
// - output_zp: the zero point of output.
// Output does not need to be initialized.
// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m *
// 2^(s - 31).
inline void CwiseMul(const int16_t* input_1, const int16_t* input_2,
int32_t multiplier, int32_t shift, int32_t n_batch,
int32_t n_input, int32_t output_zp, int8_t* output) {
PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input,
output_zp, output);
}
// Element-wise in-place clipping of a vector. Overloaded for float, int16_t,
// int8_t. Parameters:
// - vector: vector of size v_size.
// - v_size: the size of the vector.
// - clipping_value: the value used for clipping.
inline void CwiseClipping(float* vector, const int v_size,
const float clipping_value) {
PortableCwiseClipping(vector, v_size, clipping_value);
}
inline void CwiseClipping(int16_t* vector, const int v_size,
const int16_t clipping_value) {
PortableCwiseClipping(vector, v_size, clipping_value);
}
inline void CwiseClipping(int8_t* vector, const int v_size,
const int8_t clipping_value) {
PortableCwiseClipping(vector, v_size, clipping_value);
}
// Element-wise saturating addition of two quantized vectors without rescaling.
// Parameters:
// - input_1: batch vector of size n_batch * n_input; 16 bit.
// - input_2: batch vector of size n_batch * n_input; 16 bit.
// - n_batch: the number of batches.
// - n_input: the size for input and output.
// - output: the 8 bit output of size n_batch * n_input.
// Output does not need to be initialized.
inline void CwiseAdd(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int16_t* output) {
PortableCwiseAdd(input_1, input_2, n_batch, n_input, output);
}
inline void MeanStddevNormalization(const float* input_vector,
float* output_vector, int v_size,
int n_batch) {
PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
}
inline void Sub1Vector(const float* vector, int v_size, float* result) {
PortableSub1Vector(vector, v_size, result);
}
inline void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
PortableSub1Vector(vector, v_size, result);
}
// Multiply all elements of vector with a scalar.
inline void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
float* result) {
PortableVectorScalarMultiply(vector, v_size, scale, result);
}
// Saturate Add with rescale on both inputs.
inline void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
const int8_t* recurrent, int8_t recurrent_zp,
int32_t input_effective_scale_a,
int32_t input_effective_scale_b,
int32_t recurrent_effective_scale_a,
int32_t recurrent_effective_scale_b,
int32_t n_batch, int32_t n_cell,
int16_t* output) {
PortableTwoGateSaturatingAdd(
input, input_zp, recurrent, recurrent_zp, input_effective_scale_a,
input_effective_scale_b, recurrent_effective_scale_a,
recurrent_effective_scale_b, n_batch, n_cell, output);
}
// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
// dimension composed by input vectors independent from each other). The result
// of the multiplication is accumulated to the passed result buffer.
// More specifically, for a matrix M of shape [n, i] and a batched-vector
// of shape [i, batch] it will first compute the product of shape [n, batch].
// This product will be accumulated to the result buffer.
inline void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
int m_cols, const float* vector,
int n_batch, float* result) {
PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
n_batch, result);
}
// Same as the function above, but the matrix is a sparse tensor with block
// pattern 1x4.
// This function assumes that m_cols is a multiple of the block size (4 in this
// case) so that there's no incomplete block.
inline void MatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vector, const float* scaling_factors,
int n_batch, float* __restrict__ result) {
PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
scaling_factors, n_batch, result);
}
inline void MatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result, const float* per_channel_scale,
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
bool* compute_row_sums, CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulate(
matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
per_channel_scale, input_offset, scratch, row_sums, compute_row_sums,
context);
}
inline void MatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vector, const float* scaling_factors,
int n_batch, int32_t* scratch, float* __restrict__ result,
CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
scaling_factors, n_batch, result);
}
// Same as the function above, but the matrix is a sparse tensor with block
// pattern 1x4.
// This function assumes that m_cols is a multiple of the block size (4 in this
// case) so that there's no incomplete block.
inline void SparseMatrixBatchVectorMultiplyAccumulate1x4(
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
}
// Same as the function above, but the matrix is stored in block compressed
// sparse row format with block pattern 1x16 which consists of two arrays:
// 1. A matrix array stores non-zero blocks of the matrix in row major.
// 2. A ledger array stores nrows groups, one group per row. Each group starts
// with an integer representing the number of non-zero blocks for the
// corresponding row and follows with column indexes of the first element
// of each non-zero block.
// This function assumes that
// 1. m_cols is a multiple of 16 so that all blocks are full blocks.
// 2. m_cols < 254 * 16 so that block index can be represented by uint8.
inline void SparseMatrixBatchVectorMultiplyAccumulate(
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
float* __restrict__ result) {
PortableSparseMatrixBatchVectorMultiplyAccumulate(
matrix, ledger, m_rows, m_cols, vector, n_batch, result);
}
// Same as the function above, but the matrix is a sparse tensor with block
// pattern 1x16.
// This function assumes that m_cols is a multiple of the block size (16 in this
// case) so that there's no incomplete block. Also, it assumes all offsets of
// input, output and filter are zero.
inline void SparseMatrixBatchVectorMultiplyAccumulate1x16(
const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
int n_batch, const int32_t input_offset, const int32_t output_multiplier,
const int32_t output_shift, const int32_t output_offset,
const int32_t output_activation_min, const int32_t output_activation_max,
int8_t* __restrict__ result) {
PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch,
input_offset, output_multiplier, output_shift, output_offset,
output_activation_min, output_activation_max, result);
}
// Same as the function above, but the matrix is stored in block compressed
// sparse row format with block pattern 1x16 which consists of two arrays:
// 1. A matrix array stores non-zero blocks of the matrix in row major.
// 2. A ledger array stores nrows groups, one group per row. Each group starts
// with an integer representing the number of non-zero blocks for the
// corresponding row followed by column index of the first element of
// each non-zero block.
// This function assumes that
// 1. m_cols is a multiple of 16 so that all blocks are full blocks.
// 2. m_cols < 254 * 16 so that block index can be represented by uint8.
inline void SparseMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
const int m_cols, const int8_t* __restrict__ vectors,
const float* scaling_factors, int n_batch, float* __restrict__ result) {
PortableSparseMatrixBatchVectorMultiplyAccumulate(
matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch,
result);
}
// Same as the above 8, 8, 8 integer matmul except for the presence of zero
// point and non-accumulative.
// TODO(b/148688698): remove this function by folding zero point calculation in
// prepare() function.
inline void MatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int16_t* output, CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulate(
input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
n_output, output_zp, scratch, output, context);
}
// Same as above but has 16 bit and 8 bit input and 8 bit output.
// Used in projection when hidden is 16bit.
inline void MatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int8_t* output, CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulate(
input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
n_output, output_zp, scratch, output, context);
}
// Same as the function above, but provides separate scaling factor for the
// matrix and the vectors. The scaling factors are multiplied in the
// scaling_factor_scratch buffer.
inline void MatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float matrix_scaling_factor,
const float* vector_scaling_factors, int n_batch,
float* __restrict__ result, const float* per_channel_scale,
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
bool* compute_row_sums, float* scaling_factor_scratch,
CpuBackendContext* context) {
for (int b = 0; b < n_batch; ++b) {
scaling_factor_scratch[b] =
vector_scaling_factors[b] * matrix_scaling_factor;
}
MatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vectors,
scaling_factor_scratch, n_batch, result,
per_channel_scale, input_offset, scratch,
row_sums, compute_row_sums, context);
}
// Multiplies a matrix with a scalar and reduce the result on each row to a
// scalar.
// Parameters:
// - matrix: matrix of size n_row * n_col
// - scalar: the scalar that is multiplied to each element in the matrix
// - n_row: the row count of the matrix
// - n_col: the column count of the matrix
// - output: the 32bit output
// Note: We do not need saturation because the int8 * int8 is safe from overflow
// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero
// initial output value is not exceptionally large.
inline void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
int32_t n_row, int32_t n_col,
int32_t* output) {
PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output);
}
// Same as the above 8, 8, 8 integer matmul except for the presence of zero
// point and non-accumulative.
// TODO(b/148688698): remove this function by folding zero point calculation in
// prepare() function.
inline void MatrixBatchVectorMultiply(const int8_t* input,
int32_t input_zeropoint,
const int8_t* input_to_gate_weights,
int32_t input_to_gate_effective_scale_a,
int32_t input_to_gate_effective_scale_b,
int32_t n_batch, int32_t n_input,
int32_t n_cell, int8_t* gate_output,
int8_t gate_output_zp) {
PortableMatrixBatchVectorMultiply(
input, input_zeropoint, input_to_gate_weights,
input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch,
n_input, n_cell, gate_output, gate_output_zp);
}
// Same as above but has 16 bit and 8 bit input and 8 bit output.
// Used in projection when hidden is 16bit.
inline void MatrixBatchVectorMultiply(const int16_t* hidden,
const int8_t* hidden_to_output_weights,
int32_t proj_effective_scale_a,
int32_t proj_effective_scale_b,
const int32_t* gate_bias, int32_t n_batch,
int32_t n_hidden, int32_t n_output,
int32_t output_zp, int8_t* proj_output) {
PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights,
proj_effective_scale_a,
proj_effective_scale_b, gate_bias, n_batch,
n_hidden, n_output, output_zp, proj_output);
}
// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
// operation, the assumption here is that result array is initialized to valid
// values.
template <typename T>
inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size,
const T* batch_vector,
int n_batch, T* result) {
for (int b = 0; b < n_batch; b++) {
VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
// Update the pointers.
result += v_size;
batch_vector += v_size;
}
}
// Same as above, but inputs are 16bit integer and output is 16bit integer.
inline void VectorBatchVectorCwiseProductAccumulate(
const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
int32_t multiplier, int shift, int16_t* result) {
PortableVectorBatchVectorCwiseProductAccumulate(
vector, v_size, batch_vector, n_batch, multiplier, shift, result);
}
// Apply Rectified Linear to elements of a vector.
inline void ApplyReluToVector(const float* vector, int v_size, float* result) {
for (int v = 0; v < v_size; v++) {
result[v] = std::max(0.0f, vector[v]);
}
}
// Apply Rectified Linear 1 (cap to [-1;1]) to elements of a vector
inline void ApplyRelu1ToVector(const float* vector, int v_size, float* result) {
for (int v = 0; v < v_size; v++) {
result[v] = std::max(-1.0f, std::min(vector[v], 1.0f));
}
}
// Apply Rectified Linear 6 (cap to [0;6]) to elements of a vector
inline void ApplyRelu6ToVector(const float* vector, int v_size, float* result) {
for (int v = 0; v < v_size; v++) {
result[v] = std::max(0.0f, std::min(vector[v], 6.0f));
}
}
// Apply tanh to elements of a vector
inline void ApplyTanhToVector(const float* vector, int v_size, float* result) {
for (int v = 0; v < v_size; v++) {
result[v] = std::tanh(vector[v]);
}
}
// Apply signbit to elements of a vector
inline void ApplySignbitToVector(const float* vector, int v_size,
float* result) {
for (int v = 0; v < v_size; v++) {
result[v] = std::signbit(vector[v]);
}
}
// Apply sigmoid to elements of a vector.
inline void ApplySigmoidToVector(const float* vector, int v_size,
float* result) {
for (int v = 0; v < v_size; v++) {
result[v] = 1.0f / (1.0f + std::exp(-vector[v]));
}
}
// Apply appropriate activation function to elements of a vector.
inline void ApplyActivationToVector(const float* vector, int v_size,
TfLiteFusedActivation activation,
float* result) {
switch (activation) {
case kTfLiteActNone:
return;
case kTfLiteActRelu:
return ApplyReluToVector(vector, v_size, result);
case kTfLiteActReluN1To1:
return ApplyRelu1ToVector(vector, v_size, result);
case kTfLiteActRelu6:
return ApplyRelu6ToVector(vector, v_size, result);
case kTfLiteActTanh:
return ApplyTanhToVector(vector, v_size, result);
case kTfLiteActSignBit:
return ApplySignbitToVector(vector, v_size, result);
case kTfLiteActSigmoid:
return ApplySigmoidToVector(vector, v_size, result);
}
}
} // namespace micro_tensor_utils
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_

View File

@@ -1,40 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
namespace tflite {
namespace ops {
namespace micro {
// Same as gtl::Greater but defined here to reduce dependencies and
// binary size for micro environment.
struct Greater {
template <typename T>
bool operator()(const T& x, const T& y) const {
return x > y;
}
};
struct Less {
template <typename T>
bool operator()(const T& x, const T& y) const {
return x < y;
}
};
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_

View File

@@ -1,215 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
struct OpDataMirrorPad {
int input_dims;
int output_size;
int offset;
int output_dims_num_elements_buffer_index;
int input_dims_num_elements_buffer_index;
};
// Helper method that fills the left and right pads.
template <typename T>
inline void GetPadding(const T* data, int offset, int64_t* left_pad,
int64_t* right_pad) {
*left_pad = static_cast<int64_t>(*(data + offset * 2));
*right_pad = static_cast<int64_t>(*(data + offset * 2 + 1));
}
// Given dimension index and the left/right padding.
// Returns the corresponding dimension in the input array.
inline int GetInputDimension(int padded_dimension, int left_pad, int right_pad,
int input_dim_size, int offset) {
if (padded_dimension < left_pad) {
const int original_ind = left_pad + offset - 1;
return original_ind - (std::min(padded_dimension, original_ind - offset));
}
padded_dimension -= left_pad;
if (padded_dimension >= input_dim_size) {
padded_dimension -= input_dim_size;
const int original_ind = input_dim_size - (1 + offset);
return original_ind - std::min(padded_dimension, original_ind);
}
return padded_dimension;
}
// Given and index in output array, returns the index of the value
// in input array.
int GetFlatIndex(int index, int num_dims,
const TfLiteEvalTensor* padding_matrix,
const TfLiteIntArray* input_dims,
int* output_dims_num_elements, int* input_dims_num_elements,
const int offset) {
int flat_index = 0;
int64_t left_pad = 0, right_pad = 0, dimension_index, index_in_input;
for (int i = 0; i < num_dims; ++i) {
switch (padding_matrix->type) {
case kTfLiteInt32:
GetPadding(padding_matrix->data.i32, i, &left_pad, &right_pad);
break;
case kTfLiteInt64:
GetPadding(padding_matrix->data.i64, i, &left_pad, &right_pad);
break;
default:
break;
}
dimension_index = index / output_dims_num_elements[i];
index_in_input = GetInputDimension(dimension_index, left_pad, right_pad,
input_dims->data[i], offset);
flat_index += index_in_input * (input_dims_num_elements)[i];
index %= output_dims_num_elements[i];
}
return flat_index;
}
template <typename T>
void MirrorPad(const TfLiteEvalTensor* padding_matrix,
const TfLiteIntArray* input_dims, int* output_dims_num_elements,
int* input_dims_num_elements, const T* input_data,
T* output_data, const int offset, const int num_dims,
const int output_size) {
for (int i = 0; i < output_size; ++i) {
output_data[i] = input_data[GetFlatIndex(
i, num_dims, padding_matrix, input_dims, output_dims_num_elements,
input_dims_num_elements, offset)];
}
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TfLiteStatus status = kTfLiteOk;
const OpDataMirrorPad* data =
static_cast<const OpDataMirrorPad*>(node->user_data);
const TfLiteEvalTensor* input_tensor =
tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* padding_matrix =
tflite::micro::GetEvalInput(context, node, 1);
TfLiteEvalTensor* output_tensor =
tflite::micro::GetEvalOutput(context, node, 0);
const int input_dims = data->input_dims;
const int output_size = data->output_size;
int* input_dims_num_elements = (int*)context->GetScratchBuffer(
context, data->input_dims_num_elements_buffer_index);
int* output_dims_num_elements = (int*)context->GetScratchBuffer(
context, data->output_dims_num_elements_buffer_index);
for (int i = 0; i < input_dims; i++) {
output_dims_num_elements[i] = 1;
input_dims_num_elements[i] = 1;
}
for (int i = input_dims - 2; i >= 0; i--) {
output_dims_num_elements[i] =
output_dims_num_elements[i + 1] * output_tensor->dims->data[i + 1];
input_dims_num_elements[i] =
input_dims_num_elements[i + 1] * input_tensor->dims->data[i + 1];
}
switch (output_tensor->type) {
case kTfLiteFloat32: {
MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements,
input_dims_num_elements,
tflite::micro::GetTensorData<float>(input_tensor),
tflite::micro::GetTensorData<float>(output_tensor),
data->offset, input_dims, output_size);
break;
}
case kTfLiteInt8: {
MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements,
input_dims_num_elements,
tflite::micro::GetTensorData<int8_t>(input_tensor),
tflite::micro::GetTensorData<int8_t>(output_tensor),
data->offset, input_dims, output_size);
break;
}
default:
status = kTfLiteError;
break;
}
#undef TF_LITE_MIRROR_PAD
return status;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataMirrorPad));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataMirrorPad* data = static_cast<OpDataMirrorPad*>(node->user_data);
TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0);
TfLiteTensor* padding_matrix =
micro_context->AllocateTempInputTensor(node, 1);
TfLiteTensor* output_tensor =
micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE_EQ(context, NumDimensions(padding_matrix), 2);
TF_LITE_ENSURE_EQ(context, SizeOfDimension(padding_matrix, 0),
NumDimensions(input_tensor));
auto* params =
reinterpret_cast<TfLiteMirrorPaddingParams*>(node->builtin_data);
if (params == nullptr) {
return kTfLiteError;
}
data->offset =
params->mode != TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect ? 0
: 1;
data->input_dims = NumDimensions(input_tensor);
data->output_size = NumElements(output_tensor);
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, data->input_dims * sizeof(int),
&data->output_dims_num_elements_buffer_index));
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
context, data->input_dims * sizeof(int),
&data->input_dims_num_elements_buffer_index));
micro_context->DeallocateTempTfLiteTensor(input_tensor);
micro_context->DeallocateTempTfLiteTensor(padding_matrix);
micro_context->DeallocateTempTfLiteTensor(output_tensor);
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_MIRROR_PAD() {
return tflite::micro::RegisterOp(Init, Prepare, Eval);
}
} // namespace tflite

View File

@@ -1,67 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/mul.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
#include "tensorflow/lite/kernels/internal/reference/mul.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
switch (input1->type) {
case kTfLiteInt8:
case kTfLiteInt32:
EvalMulQuantizedReference(context, node, data, input1, input2, output);
break;
case kTfLiteFloat32:
EvalMulFloatReference(context, node, params, data, input1, input2,
output);
break;
default:
MicroPrintf("Type %s (%d) not supported.",
TfLiteTypeGetName(input1->type), input1->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteRegistration Register_MUL() {
return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval);
}
} // namespace tflite

View File

@@ -1,184 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
#include "tensorflow/lite/kernels/internal/reference/mul.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/mul.h"
#include "tensorflow/lite/micro/memory_helpers.h"
namespace tflite {
const int kMulInput1Tensor = 0;
const int kMulInput2Tensor = 1;
const int kMulOutputTensor = 0;
void* MulInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataMul));
}
TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, OpDataMul* data) {
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kMulInput1Tensor);
TF_LITE_ENSURE(context, input1 != nullptr);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kMulInput2Tensor);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kMulOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
double real_multiplier = static_cast<double>(input1->params.scale) *
static_cast<double>(input2->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
&data->output_shift);
data->input1_zero_point = input1->params.zero_point;
data->input2_zero_point = input2->params.zero_point;
data->output_zero_point = output->params.zero_point;
} else if (output->type == kTfLiteInt32) {
CalculateActivationRange(params->activation, &data->output_activation_min,
&data->output_activation_max);
} else {
CalculateActivationRange(params->activation,
&data->output_activation_min_f32,
&data->output_activation_max_f32);
}
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus MulPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataMul* data = static_cast<OpDataMul*>(node->user_data);
return CalculateOpDataMul(context, node, params, data);
}
void EvalMulQuantizedReference(TfLiteContext* context, TfLiteNode* node,
const OpDataMul* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.quantized_activation_min = data->output_activation_min;
op_params.quantized_activation_max = data->output_activation_max;
op_params.float_activation_max = data->output_activation_max_f32;
op_params.input1_offset = -data->input1_zero_point;
op_params.input2_offset = -data->input2_zero_point;
op_params.output_offset = data->output_zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (input1->type == kTfLiteInt8) {
if (need_broadcast) {
reference_integer_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
reference_integer_ops::Mul(op_params,
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else if (input1->type == kTfLiteInt32) {
if (need_broadcast) {
reference_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int32_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int32_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int32_t>(output));
} else {
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int32_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int32_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int32_t>(output));
}
}
}
void EvalMulFloatReference(TfLiteContext* context, TfLiteNode* node,
TfLiteMulParams* params, const OpDataMul* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
op_params.float_activation_min = data->output_activation_min_f32;
op_params.float_activation_max = data->output_activation_max_f32;
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (need_broadcast) {
reference_ops::BroadcastMul4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
}
} // namespace tflite

View File

@@ -1,170 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/pooling.h"
namespace tflite {
const int kPoolingInputTensor = 0;
const int kPoolingOutputTensor = 0;
TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
const TfLitePoolParams* params,
const TfLiteTensor* input,
const TfLiteTensor* output,
OpDataPooling* data) {
// input: batch, height, width, channel
int height = SizeOfDimension(input, 1);
int width = SizeOfDimension(input, 2);
int out_height, out_width;
data->padding = ComputePaddingHeightWidth(
params->stride_height, params->stride_width,
/*dilation_rate_height=*/1,
/*dilation_rate_width=*/1, height, width, params->filter_height,
params->filter_width, params->padding, &out_height, &out_width);
return kTfLiteOk;
}
TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataPooling* data = static_cast<OpDataPooling*>(node->user_data);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kPoolingInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kPoolingOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(
CalculateOpDataPooling(context, params, input, output, data));
if (input->type == kTfLiteFloat32) {
CalculateActivationRange(params->activation, &data->activation_min_f32,
&data->activation_max_f32);
} else if (input->type == kTfLiteInt8) {
CalculateActivationRangeQuantized(context, params->activation, output,
&data->activation_min,
&data->activation_max);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
void AveragePoolingEvalFloat(const TfLiteContext* context,
const TfLiteNode* node,
const TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
const TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
TFLITE_DCHECK(input->type == kTfLiteInt8);
PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
reference_integer_ops::AveragePool(
op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params, const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.float_activation_min = data->activation_min_f32;
op_params.float_activation_max = data->activation_max_f32;
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLitePoolParams* params,
const OpDataPooling* data,
const TfLiteEvalTensor* input,
TfLiteEvalTensor* output) {
tflite::PoolParams op_params;
op_params.stride_height = params->stride_height;
op_params.stride_width = params->stride_width;
op_params.filter_height = params->filter_height;
op_params.filter_width = params->filter_width;
op_params.padding_values.height = data->padding.height;
op_params.padding_values.width = data->padding.width;
op_params.quantized_activation_min = data->activation_min;
op_params.quantized_activation_max = data->activation_max;
reference_integer_ops::MaxPool(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} // namespace tflite

View File

@@ -1,39 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
const TfLiteTensor* alpha,
TfLiteTensor* output, PreluParams* params);
void BroadcastPrelu4DSlowFloat(const RuntimeShape& unextended_input1_shape,
const float* input1_data,
const RuntimeShape& unextended_input2_shape,
const float* input2_data,
const RuntimeShape& unextended_output_shape,
float* output_data);
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_

View File

@@ -1,105 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/prelu.h"
namespace tflite {
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
const TfLiteTensor* alpha,
TfLiteTensor* output, PreluParams* params) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
double real_multiplier_1 = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
double real_multiplier_2 = static_cast<double>(input->params.scale) *
static_cast<double>(alpha->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier_1, &params->output_multiplier_1,
&params->output_shift_1);
QuantizeMultiplier(real_multiplier_2, &params->output_multiplier_2,
&params->output_shift_2);
params->input_offset = -input->params.zero_point;
params->alpha_offset = -alpha->params.zero_point;
params->output_offset = output->params.zero_point;
}
return kTfLiteOk;
}
void BroadcastPrelu4DSlowFloat(const RuntimeShape& unextended_input1_shape,
const float* input1_data,
const RuntimeShape& unextended_input2_shape,
const float* input2_data,
const RuntimeShape& unextended_output_shape,
float* output_data) {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
for (int b = 0; b < output_shape.Dims(0); ++b) {
for (int y = 0; y < output_shape.Dims(1); ++y) {
for (int x = 0; x < output_shape.Dims(2); ++x) {
for (int c = 0; c < output_shape.Dims(3); ++c) {
auto out_idx = Offset(output_shape, b, y, x, c);
auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
auto in1_val = input1_data[in1_idx];
auto in2_val = input2_data[in2_idx];
output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val;
}
}
}
}
}
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
PreluParams* params = static_cast<PreluParams*>(node->user_data);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* alpha = micro_context->AllocateTempInputTensor(node, 1);
TF_LITE_ENSURE(context, alpha != nullptr);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_OK(context,
CalculatePreluParams(input, alpha, output, params));
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(alpha);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,41 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/quantize.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context,
sizeof(OpDataQuantizeReference));
}
} // namespace
TfLiteRegistration Register_QUANTIZE() {
return tflite::micro::RegisterOp(Init, PrepareQuantizeReference,
EvalQuantizeReference);
}
} // namespace tflite

View File

@@ -1,37 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
struct OpDataQuantizeReference {
tflite::QuantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t requantize_output_multiplier;
int requantize_output_shift;
int32_t input_zero_point;
};
TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node);
TfLiteStatus PrepareQuantizeReference(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_

View File

@@ -1,239 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <limits>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/quantize.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
// Currently this only support affine per-layer quantization.
TF_LITE_ENSURE_EQ(context, output->quantization.type,
kTfLiteAffineQuantization);
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(output->quantization.params);
TF_LITE_ENSURE(context, affine_quantization);
TF_LITE_ENSURE(context, affine_quantization->scale);
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
TF_LITE_ENSURE(
context, input->type == kTfLiteFloat32 || input->type == kTfLiteInt32 ||
input->type == kTfLiteInt16 || input->type == kTfLiteInt8 ||
input->type == kTfLiteUInt8);
TF_LITE_ENSURE(context, output->type == kTfLiteInt8 ||
output->type == kTfLiteInt16 ||
output->type == kTfLiteInt32 ||
output->type == kTfLiteUInt8);
if ((input->type == kTfLiteInt16 && output->type == kTfLiteInt8) ||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt8) ||
(input->type == kTfLiteInt8 && output->type == kTfLiteUInt8) ||
(input->type == kTfLiteUInt8 && output->type == kTfLiteInt8) ||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt16) ||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt32) ||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16) ||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt32) ||
(input->type == kTfLiteInt32 && output->type == kTfLiteInt8) ||
(input->type == kTfLiteInt32 && output->type == kTfLiteInt16)) {
double effective_scale = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_scale, &data->requantize_output_multiplier,
&data->requantize_output_shift);
}
data->quantization_params.zero_point = output->params.zero_point;
data->quantization_params.scale = static_cast<double>(output->params.scale);
data->input_zero_point = input->params.zero_point;
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
default:
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt32) {
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(
tflite::micro::GetTensorData<int32_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
tflite::micro::GetTensorData<int32_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
break;
default:
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt16) {
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
case kTfLiteInt32:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int32_t>(output));
return kTfLiteOk;
default:
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
// Int8 to Int8 requantization, required if the input and output tensors
// have different scales and/or zero points.
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteUInt8:
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<uint8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
break;
case kTfLiteInt32:
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int32_t>(output));
break;
default:
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteUInt8) {
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(
tflite::micro::GetTensorData<uint8_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
MicroPrintf("Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,87 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/micro/micro_resource_variable.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
constexpr int kInputVariableId = 0;
constexpr int kOutputValue = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(NumInputs(node) == 1);
TFLITE_DCHECK(NumOutputs(node) == 1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input_resource_id_tensor =
micro_context->AllocateTempInputTensor(node, kInputVariableId);
TFLITE_DCHECK(input_resource_id_tensor != nullptr);
TFLITE_DCHECK(input_resource_id_tensor->type == kTfLiteResource);
TFLITE_DCHECK(NumElements(input_resource_id_tensor) == 1);
micro_context->DeallocateTempTfLiteTensor(input_resource_id_tensor);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input_resource_id_tensor =
tflite::micro::GetEvalInput(context, node, kInputVariableId);
TFLITE_DCHECK(input_resource_id_tensor != nullptr);
TfLiteEvalTensor* output_value =
tflite::micro::GetEvalOutput(context, node, kOutputValue);
TFLITE_DCHECK(output_value != nullptr);
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
if (resources == nullptr) {
MicroPrintf(
"READ_VARIABLE requires resource variables. Please create "
"ResourceVariables and pass it to the interpreter.");
return kTfLiteError;
}
TF_LITE_ENSURE_OK(
context,
resources->Read(input_resource_id_tensor->data.i32[0], output_value));
return kTfLiteOk;
}
} // namespace.
TfLiteRegistration Register_READ_VARIABLE() {
return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
}
} // namespace tflite

View File

@@ -1,72 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/reduce.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mean.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/reduce.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) {
return context->AllocatePersistentBuffer(context, sizeof(OpDataReduce));
}
TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
return PrepareMaxHelper(context, node,
static_cast<OpDataReduce*>(node->user_data));
}
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
return PrepareMeanOrSumHelper(context, node,
static_cast<OpDataReduce*>(node->user_data));
}
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
return EvalMeanHelper(context, node,
static_cast<OpDataReduce*>(node->user_data));
}
TfLiteStatus EvalMax(TfLiteContext* context, TfLiteNode* node) {
OpDataReduce* op_data = static_cast<OpDataReduce*>(node->user_data);
return EvalMaxHelper(context, node, op_data);
}
TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) {
return EvalSumHelper(context, node,
static_cast<OpDataReduce*>(node->user_data));
}
TfLiteRegistration Register_MEAN() {
return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalMean);
}
TfLiteRegistration Register_REDUCE_MAX() {
return tflite::micro::RegisterOp(InitReduce, PrepareMax, EvalMax);
}
TfLiteRegistration Register_SUM() {
return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalSum);
}
} // namespace tflite

View File

@@ -1,64 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
extern const int kMaxNumberOfAxis;
extern const int kMaxNumberOfReducedAxis;
struct OpDataReduce {
int32_t multiplier;
int shift;
int temp_buffer_idx;
int resolved_axis_idx;
int input_zp;
float input_scale;
int output_zp;
float output_scale;
int num_output_elements;
};
TfLiteStatus PrepareMaxHelper(TfLiteContext* context, TfLiteNode* node,
OpDataReduce* op_data);
TfLiteStatus PrepareMeanOrSumHelper(TfLiteContext* context, TfLiteNode* node,
OpDataReduce* op_data);
TfLiteStatus EvalMaxHelper(TfLiteContext* context, TfLiteNode* node,
OpDataReduce* op_data);
TfLiteStatus EvalMeanHelper(TfLiteContext* context, TfLiteNode* node,
OpDataReduce* op_data);
TfLiteStatus EvalSumHelper(TfLiteContext* context, TfLiteNode* node,
OpDataReduce* op_data);
void ReduceResolveAxis(const int* axis_data, int axis_count,
MeanParams* op_params);
TfLiteRegistration Register_MEAN();
TfLiteRegistration Register_REDUCE_MAX();
TfLiteRegistration Register_SUM();
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_

View File

@@ -1,118 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace reshape {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
// Tensorflow's Reshape allows one of the shape components to have the
// special -1 value, meaning it will be calculated automatically based on the
// input. Here we calculate what that dimension should be so that the number
// of output elements in the same as the number of input elements.
int num_input_elements = NumElements(input);
TfLiteIntArray* output_shape = output->dims;
if (NumInputs(node) == 1 && // Legacy scalar supported with params.
output_shape->size == 1 && output_shape->data[0] == 0) {
// Legacy tflite models use a shape parameter of [0] to indicate scalars,
// so adjust accordingly. TODO(b/111614235): Allow zero-sized buffers during
// toco conversion.
output_shape->size = 0;
}
int num_output_elements = 1;
int stretch_dim = -1;
for (int i = 0; i < output_shape->size; ++i) {
int value = output_shape->data[i];
if (value == -1) {
TF_LITE_ENSURE_EQ(context, stretch_dim, -1);
stretch_dim = i;
} else {
num_output_elements *= value;
}
}
if (stretch_dim != -1) {
output_shape->data[stretch_dim] = num_input_elements / num_output_elements;
num_output_elements *= output_shape->data[stretch_dim];
}
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_EQ(context, ReshapeOutput(context, node), kTfLiteOk);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
// TODO(b/162522304): storing input bytes in OpData increases some models
// significantly, possibly due to alignment issues.
size_t input_bytes;
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
input_bytes *= ElementCount(*input->dims);
// Do nothing for in-place reshape.
if (input->data.raw != output->data.raw) {
// Otherwise perform reshape with copy.
memcpy(output->data.raw, input->data.raw, input_bytes);
}
return kTfLiteOk;
}
} // namespace reshape
TfLiteRegistration Register_RESHAPE() {
return tflite::micro::RegisterOp(nullptr, reshape::Prepare, reshape::Eval);
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -1,76 +0,0 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/round.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace micro {
namespace round {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
for (int i = 0; i < output->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
reference_ops::Round(tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
}
} // namespace round
TfLiteRegistration Register_ROUND() {
return tflite::micro::RegisterOp(nullptr, round::Prepare, round::Eval);
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -1,69 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length);
// Common helper function to SoftmaxPrepare.
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
const TfLiteSoftmaxParams* params,
SoftmaxParams* op_data);
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node);
// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_SOFTMAX();
#if defined(XTENSA) || defined(CMSIS_NN)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 input and int16 output.
TfLiteRegistration Register_SOFTMAX_INT8_INT16();
#else
inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() {
return Register_SOFTMAX();
}
#endif
#if defined(CMSIS_NN)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 input/output and uses the latency optimized implementations.
TfLiteRegistration Register_SOFTMAX_INT8();
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int16 input/output and uses the latency optimized implementations.
TfLiteRegistration Register_SOFTMAX_INT16();
#else
inline TfLiteRegistration Register_SOFTMAX_INT8() { return Register_SOFTMAX(); }
inline TfLiteRegistration Register_SOFTMAX_INT16() {
return Register_SOFTMAX();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_

View File

@@ -1,162 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/softmax.h"
#include "tensorflow/lite/micro/micro_context.h"
namespace tflite {
namespace {
// Softmax parameter data that persists in user_data
const int kInt16LUTArraySize = 513;
TfLiteStatus InitializeLutForInt16(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
SoftmaxParams* op_data) {
// Only allocate LUTs for KTfLiteInt16 data type
if (input->type == kTfLiteInt16) {
void* raw_exp_lut = context->AllocatePersistentBuffer(
context, sizeof(int16_t) * kInt16LUTArraySize);
TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
context, sizeof(int16_t) * kInt16LUTArraySize);
TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
op_data->one_over_one_plus_x_lut =
reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
}
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE(context,
input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
} else {
TF_LITE_ENSURE_EQ(context, input->type, output->type);
}
// Populate LUT if required
if (input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
// exp LUT only used on negative values
// we consider exp(-10.0) is insignificant to accumulation
gen_lut<float, int16_t, int16_t>(
[](float value) { return std::exp(value); }, -10.0f, 0.0f, -1.0f, 1.0f,
op_data->exp_lut);
gen_lut<float, int16_t, int16_t>(
[](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f, -1.0f,
1.0f, op_data->one_over_one_plus_x_lut);
op_data->zero_point = output->params.zero_point;
op_data->scale = output->params.scale;
}
return kTfLiteOk;
}
} // namespace
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
const TfLiteSoftmaxParams* params,
SoftmaxParams* op_data) {
if (InitializeLutForInt16(context, input, output, op_data) != kTfLiteOk) {
return kTfLiteError;
}
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
if (input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
(0.001f * 1.f / 32768));
} else { // input->type == kTfLiteInt8
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536,
(0.001f * 1.f / 65536));
} else { // output->type == kTfLiteint8
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
}
}
static const int kScaledDiffIntegerBits = 5;
// Calculate input_multiplier and input_left_shift
if (input->type == kTfLiteInt16) {
int input_left_shift;
double input_scale_beta_rescale =
static_cast<double>(input->params.scale) *
static_cast<double>(params->beta) /
(10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
// correspond to [-10.0, 0.0]
QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier,
&input_left_shift);
op_data->input_left_shift = input_left_shift;
} else {
int input_left_shift;
tflite::PreprocessSoftmaxScaling(
static_cast<double>(params->beta),
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
&op_data->input_multiplier, &input_left_shift);
op_data->input_left_shift = input_left_shift;
op_data->diff_min =
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
op_data->input_left_shift);
}
} else {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
op_data->beta = static_cast<double>(params->beta);
}
return kTfLiteOk;
}
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
}
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, input != nullptr);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE(context, node->user_data != nullptr);
SoftmaxParams* op_data = static_cast<SoftmaxParams*>(node->user_data);
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
auto ret_val =
CalculateSoftmaxParams(context, input, output, params, op_data);
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(output);
return ret_val;
}
} // namespace tflite

View File

@@ -1,128 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
namespace ops {
namespace micro {
namespace split {
template <typename T>
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input, int axis_value) {
const int output_count = NumOutputs(node);
const TfLiteIntArray* input_dims = input->dims;
const TfLiteEvalTensor* output0 =
tflite::micro::GetEvalOutput(context, node, 0);
const TfLiteIntArray* output_dims = output0->dims;
const int split_dimensions = input_dims->size;
int axis = axis_value < 0 ? axis_value + split_dimensions : axis_value;
TFLITE_DCHECK_LT(axis, split_dimensions);
TFLITE_DCHECK_EQ(output_dims->size, split_dimensions);
int64_t split_size = output_dims->data[axis] * output_count;
TFLITE_DCHECK_EQ(split_size, input_dims->data[axis]);
int64_t outer_size = 1;
for (int i = 0; i < axis; ++i) {
outer_size *= input_dims->data[i];
}
int64_t base_inner_size = 1;
for (int i = axis + 1; i < split_dimensions; ++i) {
base_inner_size *= input_dims->data[i];
}
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
for (int k = 0; k < outer_size; ++k) {
for (int i = 0; i < output_count; ++i) {
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
T* output_data = tflite::micro::GetTensorData<T>(t);
const int copy_size = output_dims->data[axis] * base_inner_size;
T* output_ptr = output_data + k * copy_size;
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
input_ptr += copy_size;
}
}
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 0);
TF_LITE_ENSURE(context, axis != nullptr);
// Dynamic output tensors are needed if axis tensor is not constant.
// But Micro doesn't support dynamic memory allocation, so we only support
// constant axis tensor for now.
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
"Non constant axis tensor not supported");
micro_context->DeallocateTempTfLiteTensor(axis);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1);
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
if (axis_value < 0) {
axis_value += input->dims->size;
}
TF_LITE_ENSURE(context, axis_value >= 0);
TF_LITE_ENSURE(context, axis_value < input->dims->size);
switch (input->type) {
case kTfLiteFloat32: {
return SplitImpl<float>(context, node, input, axis_value);
}
case kTfLiteInt8: {
return SplitImpl<int8_t>(context, node, input, axis_value);
}
case kTfLiteInt16: {
return SplitImpl<int16_t>(context, node, input, axis_value);
}
case kTfLiteInt32: {
return SplitImpl<int32_t>(context, node, input, axis_value);
}
default:
MicroPrintf("Type %s currently not supported.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace split
TfLiteRegistration Register_SPLIT() {
return tflite::micro::RegisterOp(nullptr, split::Prepare, split::Eval);
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -1,247 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
namespace {
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpData {
bool requires_broadcast;
ArithmeticParams arithmetic_params;
};
template <typename T>
T SquaredDifference(T input1, T input2) {
const T difference = input1 - input2;
return difference * difference;
}
void* SquaredDifferenceInit(TfLiteContext* context, const char* buffer,
size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context,
TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = reinterpret_cast<OpData*>(node->user_data);
data->requires_broadcast = false;
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
output->type = input2->type;
// Ensure the quantization parameters are equivalent.
if (input1->type == kTfLiteInt8) {
const auto& input1_quantization_params = input1->params;
const auto& input2_quantization_params = input2->params;
const auto& output_quantization_params = output->params;
const int32_t integer_type_min = std::numeric_limits<int8_t>::min();
const int32_t integer_type_max = std::numeric_limits<int8_t>::max();
TF_LITE_ENSURE(context,
input1_quantization_params.zero_point >= integer_type_min);
TF_LITE_ENSURE(context,
input1_quantization_params.zero_point <= integer_type_max);
TF_LITE_ENSURE(context,
input2_quantization_params.zero_point >= integer_type_min);
TF_LITE_ENSURE(context,
input2_quantization_params.zero_point <= integer_type_max);
TF_LITE_ENSURE(context,
output_quantization_params.zero_point >= integer_type_min);
TF_LITE_ENSURE(context,
output_quantization_params.zero_point <= integer_type_max);
data->arithmetic_params.input1_offset =
-input1_quantization_params.zero_point;
data->arithmetic_params.input2_offset =
-input2_quantization_params.zero_point;
data->arithmetic_params.output_offset =
output_quantization_params.zero_point;
// shift to make integer for scales.
// 7 is selected so that maximum shifted result 255^2 * (1 << (7 * 2 ))
// does not overflow signed 32-bit integer
data->arithmetic_params.left_shift = 7;
const double twice_max_input_scale =
2.0 * static_cast<double>(std::max(input1_quantization_params.scale,
input2_quantization_params.scale));
const double real_input1_multiplier =
static_cast<double>(input1_quantization_params.scale) /
twice_max_input_scale;
double real_input2_multiplier =
static_cast<double>(input2_quantization_params.scale) /
twice_max_input_scale;
const double real_output_multiplier =
(twice_max_input_scale * twice_max_input_scale) /
static_cast<double>((1 << data->arithmetic_params.left_shift * 2) *
output_quantization_params.scale);
QuantizeMultiplierSmallerThanOneExp(
real_input1_multiplier, &data->arithmetic_params.input1_multiplier,
&data->arithmetic_params.input1_shift);
QuantizeMultiplierSmallerThanOneExp(
real_input2_multiplier, &data->arithmetic_params.input2_multiplier,
&data->arithmetic_params.input2_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->arithmetic_params.output_multiplier,
&data->arithmetic_params.output_shift);
data->arithmetic_params.quantized_activation_min =
std::numeric_limits<int8_t>::min();
data->arithmetic_params.quantized_activation_max =
std::numeric_limits<int8_t>::max();
}
data->requires_broadcast = !HaveSameShapes(input1, input2);
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
inline int8_t SquaredDifference(int8_t x, int8_t y,
const ArithmeticParams& params) {
const int32_t input1_val = params.input1_offset + x;
const int32_t input2_val = params.input2_offset + y;
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
const int32_t scaled_input1_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input1_val, params.input1_multiplier, params.input1_shift);
const int32_t scaled_input2_val =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_input2_val, params.input2_multiplier, params.input2_shift);
const int32_t raw_diff = scaled_input1_val - scaled_input2_val;
// Max of this is 255^2 * (1 << 14), so won't overflow 32 bits.
const int32_t squared_raw_diff = raw_diff * raw_diff;
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
squared_raw_diff, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
return static_cast<int8_t>(clamped_output);
}
template <typename T>
void EvalQuantizedSquaredDifference(TfLiteContext* context, TfLiteNode* node,
const OpData* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
const auto* op_data = static_cast<const OpData*>(node->user_data);
if (data->requires_broadcast) {
reference_integer_ops::BroadcastBinaryFunction4DSlow(
op_data->arithmetic_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<T>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output),
reference_integer_ops::CheckArithmeticParams, SquaredDifference);
} else {
const int flat_size = tflite::micro::GetTensorShape(input1).FlatSize();
reference_integer_ops::ElementWise(
flat_size, op_data->arithmetic_params,
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorData<int8_t>(output),
reference_integer_ops::CheckArithmeticParams, SquaredDifference);
}
}
template <typename T>
void EvalSquaredDifference(TfLiteContext* context, TfLiteNode* node,
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (data->requires_broadcast) {
reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<T>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), SquaredDifference<T>);
} else {
reference_ops::BinaryFunction<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<T>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), SquaredDifference<T>);
}
}
TfLiteStatus SquaredDifferenceEval(TfLiteContext* context, TfLiteNode* node) {
OpData* data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalSquaredDifference<float>(context, node, data, input1, input2, output);
} else if (output->type == kTfLiteInt32) {
EvalSquaredDifference<int32_t>(context, node, data, input1, input2, output);
} else if (output->type == kTfLiteInt8) {
EvalQuantizedSquaredDifference<int8_t>(context, node, data, input1, input2,
output);
} else {
MicroPrintf(
"SquaredDifference only supports FLOAT32, INT32 and INT8 now, got %d.",
output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_SQUARED_DIFFERENCE() {
return tflite::micro::RegisterOp(
SquaredDifferenceInit, SquaredDifferencePrepare, SquaredDifferenceEval);
}
} // namespace tflite

View File

@@ -1,168 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/sub.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/reference/sub.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
void* SubInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataSub));
}
void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
const OpDataSub* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
float output_activation_min, output_activation_max;
CalculateActivationRange(params->activation, &output_activation_min,
&output_activation_max);
tflite::ArithmeticParams op_params;
SetActivationParams(output_activation_min, output_activation_max, &op_params);
if (data->requires_broadcast) {
tflite::reference_ops::BroadcastSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
tflite::reference_ops::SubWithActivation(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
}
TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteSubParams* params, const OpDataSub* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
switch (output->type) {
case kTfLiteInt8: {
if (need_broadcast) {
tflite::reference_ops::BroadcastQuantSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
tflite::reference_ops::Sub(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
break;
}
case kTfLiteInt16: {
if (need_broadcast) {
tflite::reference_ops::BroadcastQuantSubSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
tflite::reference_ops::Sub(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
}
break;
}
default:
MicroPrintf("Quantized type %s not currently supported.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus SubEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kSubInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kSubInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kSubOutputTensor);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataSub& data = *(static_cast<const OpDataSub*>(node->user_data));
if (output->type == kTfLiteFloat32) {
EvalSub(context, node, params, &data, input1, input2, output);
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data,
input1, input2, output));
} else {
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteRegistration Register_SUB() {
return tflite::micro::RegisterOp(SubInit, SubPrepare, SubEval);
}
} // namespace tflite

View File

@@ -1,60 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
extern const int kSubInputTensor1;
extern const int kSubInputTensor2;
extern const int kSubOutputTensor;
struct OpDataSub {
bool requires_broadcast;
// These fields are used in both the general 8-bit -> 8bit quantized path,
// and the special 16-bit -> 16bit quantized path
int input1_shift;
int input2_shift;
int32_t output_activation_min;
int32_t output_activation_max;
// These fields are used only in the general 8-bit -> 8bit quantized path
int32_t input1_multiplier;
int32_t input2_multiplier;
int32_t output_multiplier;
int output_shift;
int left_shift;
int32_t input1_offset;
int32_t input2_offset;
int32_t output_offset;
};
TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output, OpDataSub* data);
TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_

View File

@@ -1,107 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/reference/sub.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/sub.h"
namespace tflite {
const int kSubInputTensor1 = 0;
const int kSubInputTensor2 = 1;
const int kSubOutputTensor = 0;
TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output, OpDataSub* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
// The shift is set to 15 in case of 16-bit and 20 in case of 8-bit,
// accordingly. In case of 16-bit we have 65535 << 15 which is less than 1
// << 31, therefore the addition will still fit in a 32 bit accumulator.
data->left_shift = output->type == kTfLiteInt16 ? 15 : 20;
const float twice_max_input_scale =
2 * std::max(input1->params.scale, input2->params.scale);
const double real_input1_multiplier =
static_cast<double>(input1->params.scale / twice_max_input_scale);
const double real_input2_multiplier =
static_cast<double>(input2->params.scale / twice_max_input_scale);
const double real_output_multiplier =
static_cast<double>(twice_max_input_scale /
((1 << data->left_shift) * output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
QuantizeMultiplierSmallerThanOneExp(
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
}
return kTfLiteOk;
}
TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
OpDataSub* data = static_cast<OpDataSub*>(node->user_data);
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
MicroContext* micro_context = GetMicroContext(context);
TfLiteTensor* input1 =
micro_context->AllocateTempInputTensor(node, kSubInputTensor1);
TF_LITE_ENSURE(context, input1 != nullptr);
TfLiteTensor* input2 =
micro_context->AllocateTempInputTensor(node, kSubInputTensor2);
TF_LITE_ENSURE(context, input2 != nullptr);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kSubOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_STATUS(
CalculateOpDataSub(context, params, input1, input2, output, data));
micro_context->DeallocateTempTfLiteTensor(input1);
micro_context->DeallocateTempTfLiteTensor(input2);
micro_context->DeallocateTempTfLiteTensor(output);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,106 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/svdf.h"
#include <math.h>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activation_utils.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataSvdf));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataSvdf& data = *(static_cast<const OpDataSvdf*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kSvdfInputTensor);
const TfLiteEvalTensor* weights_feature =
tflite::micro::GetEvalInput(context, node, kSvdfWeightsFeatureTensor);
const TfLiteEvalTensor* weights_time =
tflite::micro::GetEvalInput(context, node, kSvdfWeightsTimeTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 5)
? tflite::micro::GetEvalInput(context, node, kSvdfBiasTensor)
: nullptr;
TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
context, node, kSvdfInputActivationStateTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kSvdfOutputTensor);
switch (weights_feature->type) {
case kTfLiteFloat32: {
EvalFloatSvdfReference(
context, node, input, weights_feature, weights_time, bias, params,
data.scratch_tensor_index, activation_state, output);
return kTfLiteOk;
break;
}
case kTfLiteInt8: {
switch (weights_time->type) {
case kTfLiteInt16: {
EvalInt16SvdfReference(context, node, input, weights_feature,
weights_time, bias, params, activation_state,
output, data);
return kTfLiteOk;
break;
}
case kTfLiteInt8: {
EvalInt8SvdfReference(context, node, input, weights_feature,
weights_time, bias, params, activation_state,
output, data);
return kTfLiteOk;
break;
}
default:
MicroPrintf("Type %s not currently supported.",
TfLiteTypeGetName(weights_time->type));
return kTfLiteError;
}
}
default:
MicroPrintf("Type %s not currently supported.",
TfLiteTypeGetName(weights_feature->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_SVDF() {
return tflite::micro::RegisterOp(Init, PrepareSvdf, Eval);
}
} // namespace tflite

View File

@@ -1,514 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <math.h>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activation_utils.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/svdf.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
/**
* This version of SVDF is specific to TFLite Micro. It contains the following
* differences between the TFLite version:
*
* 1.) Scratch tensor allocation - scratch tensors must be known ahead of time
* for the Micro interpreter.
* 2.) Output dimensions - the TFLite version determines output size and runtime
* and resizes the output tensor. Micro runtime does not support tensor
* resizing.
*/
const int kSvdfInputTensor = 0;
const int kSvdfWeightsFeatureTensor = 1;
const int kSvdfWeightsTimeTensor = 2;
const int kSvdfBiasTensor = 3;
const int kSvdfInputActivationStateTensor =
4; // This is a variable tensor, and will be modified by this op.
const int kSvdfOutputTensor = 0;
template <typename T>
void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input_tensor,
const TfLiteEvalTensor* weights_feature_tensor,
const TfLiteEvalTensor* weights_time_tensor,
const TfLiteEvalTensor* bias_tensor,
const TfLiteSVDFParams* params,
TfLiteEvalTensor* activation_state_tensor,
TfLiteEvalTensor* output_tensor,
const OpDataSvdf& data) {
const int n_rank = params->rank;
const int n_batch = input_tensor->dims->data[0];
const int n_input = input_tensor->dims->data[1];
const int n_filter = weights_feature_tensor->dims->data[0];
const int n_unit = n_filter / n_rank;
const int n_memory = weights_time_tensor->dims->data[1];
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
int32_t* scratch_tensor = static_cast<int32_t*>(
context->GetScratchBuffer(context, data.scratch_tensor_index));
int32_t* scratch_output_tensor = static_cast<int32_t*>(
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
// Shift states.
T* const state_ptr = tflite::micro::GetTensorData<T>(activation_state_tensor);
// Left shift the activation_state.
{
T* new_state_start = state_ptr;
const T* old_state_start = state_ptr + 1;
const T* old_state_end = state_ptr + n_batch * n_filter * n_memory;
while (old_state_start != old_state_end) {
*new_state_start++ = *old_state_start++;
}
}
// Note: no need to clear the latest activation, matmul is not accumulative.
// Feature matmul.
{
T* state = tflite::micro::GetTensorData<T>(activation_state_tensor);
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
const int8_t* weight_feature =
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
const int32_t output_max = std::numeric_limits<T>::max();
const int32_t output_min = std::numeric_limits<T>::min();
T* result_in_batch = state + (n_memory - 1);
for (int b = 0; b < n_batch; b++) {
const int8_t* matrix_ptr = weight_feature;
for (int r = 0; r < n_filter; r++) {
int32_t dot_prod = 0;
const int8_t* vector_in_batch = input + b * n_input;
for (int c = 0; c < n_input; c++) {
dot_prod +=
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
}
dot_prod = MultiplyByQuantizedMultiplier(
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
dot_prod = std::min(std::max(output_min, dot_prod), output_max);
// The int16 version of the op assumes a zero_point of 0. This
// code accounts for the potentially non-zero zero_point for the int8
// version of the op.
*result_in_batch = data.activation_state_zero_point + dot_prod;
result_in_batch += n_memory;
}
}
}
// Time.
{
for (int b = 0; b < n_batch; ++b) {
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
// Perform batched vector dot product:
const T* vector1_ptr =
tflite::micro::GetTensorData<T>(weights_time_tensor);
const T* vector2_ptr =
tflite::micro::GetTensorData<T>(activation_state_tensor) +
b * n_memory * n_filter;
for (int i = 0; i < n_filter; i++) {
*scratch_ptr_batch = 0;
for (int j = 0; j < n_memory; j++) {
*scratch_ptr_batch +=
*vector1_ptr++ *
(*vector2_ptr++ - data.activation_state_zero_point);
}
scratch_ptr_batch++;
}
}
}
// Reduce, add bias, rescale, activation.
{
// Add bias.
if (bias_tensor) {
// Vector batch assign:
const int32_t* bias_data =
tflite::micro::GetTensorData<int32_t>(bias_tensor);
for (int i = 0; i < n_batch; ++i) {
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
const int32_t* bias_ptr = bias_data;
for (int j = 0; j < n_unit; ++j) {
*output_ptr++ = *bias_ptr++;
}
}
} else {
int32_t* output_ptr = scratch_output_tensor;
for (int i = 0; i < n_batch * n_unit; ++i) {
*output_ptr++ = 0;
}
}
// Reduce.
for (int b = 0; b < n_batch; ++b) {
int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit;
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
// Reduction sum vector
for (int i = 0; i < n_unit; ++i) {
for (int j = 0; j < n_rank; ++j) {
output_temp_ptr[i] += *scratch_ptr_batch++;
}
}
}
// Rescale.
const int32_t output_max = std::numeric_limits<int8_t>::max();
const int32_t output_min = std::numeric_limits<int8_t>::min();
for (int i = 0; i < n_batch * n_unit; ++i) {
int32_t x1 = scratch_output_tensor[i];
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
data.effective_scale_2_b);
int32_t x3 = x2 + data.output_zero_point;
int32_t x4 = std::min(std::max(output_min, x3), output_max);
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
static_cast<int8_t>(x4);
}
}
}
/**
* Generate two versions of the integer code. One with int16_t type for the
* time weights and the activation state, and another one with int8_t for the
* same.
*/
void EvalInt16SvdfReference(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input_tensor,
const TfLiteEvalTensor* weights_feature_tensor,
const TfLiteEvalTensor* weights_time_tensor,
const TfLiteEvalTensor* bias_tensor,
const TfLiteSVDFParams* params,
TfLiteEvalTensor* activation_state_tensor,
TfLiteEvalTensor* output_tensor,
const OpDataSvdf& data) {
EvalIntegerSvdfReference<int16_t>(
context, node, input_tensor, weights_feature_tensor, weights_time_tensor,
bias_tensor, params, activation_state_tensor, output_tensor, data);
}
void EvalInt8SvdfReference(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input_tensor,
const TfLiteEvalTensor* weights_feature_tensor,
const TfLiteEvalTensor* weights_time_tensor,
const TfLiteEvalTensor* bias_tensor,
const TfLiteSVDFParams* params,
TfLiteEvalTensor* activation_state_tensor,
TfLiteEvalTensor* output_tensor,
const OpDataSvdf& data) {
EvalIntegerSvdfReference<int8_t>(
context, node, input_tensor, weights_feature_tensor, weights_time_tensor,
bias_tensor, params, activation_state_tensor, output_tensor, data);
}
static inline void ApplyTimeWeightsBiasAndActivation(
int batch_size, int memory_size, int num_filters, int num_units, int rank,
const float* const weights_time_ptr, const float* const bias_ptr,
TfLiteFusedActivation activation, float* const state_ptr,
float* const scratch_ptr, float* const output_ptr) {
// Compute matmul(activation_state, weights_time).
for (int b = 0; b < batch_size; ++b) {
// Perform batched vector dot product:
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
const float* vector1_ptr = weights_time_ptr;
const float* vector2_ptr = state_ptr + b * memory_size * num_filters;
for (int i = 0; i < num_filters; ++i) {
*scratch_ptr_batch = 0.f;
for (int j = 0; j < memory_size; ++j) {
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
}
scratch_ptr_batch++;
}
}
// Initialize output with bias if provided.
if (bias_ptr) {
// VectorBatchVectorAssign
for (int i = 0; i < batch_size; ++i) {
float* output_data = output_ptr + i * num_units;
const float* bias_data = bias_ptr;
for (int j = 0; j < num_units; ++j) {
*output_data++ = *bias_data++;
}
}
} else {
float* output_data = output_ptr;
for (int i = 0; i < batch_size * num_units; ++i) {
*output_data++ = 0.0f;
}
}
// Reduction sum.
for (int b = 0; b < batch_size; ++b) {
float* output_ptr_batch = output_ptr + b * num_units;
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
// Reduction sum vector
for (int i = 0; i < num_units; ++i) {
for (int j = 0; j < rank; j++) {
output_ptr_batch[i] += *scratch_ptr_batch++;
}
}
}
// Apply activation.
for (int b = 0; b < batch_size; ++b) {
float* output_ptr_batch = output_ptr + b * num_units;
for (int i = 0; i < num_units; ++i) {
*output_ptr_batch =
tflite::ops::micro::ActivationValFloat(activation, *output_ptr_batch);
++output_ptr_batch;
}
}
}
void EvalFloatSvdfReference(
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
const TfLiteEvalTensor* weights_feature,
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
const TfLiteSVDFParams* params, int scratch_tensor_index,
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
const int rank = params->rank;
const int batch_size = input->dims->data[0];
const int input_size = input->dims->data[1];
const int num_filters = weights_feature->dims->data[0];
const int num_units = num_filters / rank;
const int memory_size = weights_time->dims->data[1];
const float* weights_feature_ptr =
tflite::micro::GetTensorData<float>(weights_feature);
const float* weights_time_ptr =
tflite::micro::GetTensorData<float>(weights_time);
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
TFLITE_DCHECK(context != nullptr);
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
float* scratch_ptr = static_cast<float*>(
context->GetScratchBuffer(context, scratch_tensor_index));
float* output_ptr = tflite::micro::GetTensorData<float>(output);
// Left shift the activation_state.
{
float* new_state_start = state_ptr;
const float* old_state_start = state_ptr + 1;
const float* old_state_end =
state_ptr + batch_size * num_filters * memory_size;
while (old_state_start != old_state_end) {
*new_state_start++ = *old_state_start++;
}
}
// Note: no need to clear the latest activation, matmul is not accumulative.
// Compute conv1d(inputs, weights_feature).
// The activation_state's rightmost column is used to save current cycle
// activation. This is achieved by starting at state_ptr[memory_size - 1] and
// having the stride equal to memory_size.
// Perform batched matrix vector multiply operation:
{
const float* matrix = weights_feature_ptr;
const float* vector = input_ptr;
float* result = &state_ptr[memory_size - 1];
float* result_in_batch = result;
for (int i = 0; i < batch_size; ++i) {
const float* matrix_ptr = matrix;
for (int j = 0; j < num_filters; ++j) {
float dot_prod = 0.0f;
const float* vector_in_batch = vector + i * input_size;
for (int k = 0; k < input_size; ++k) {
dot_prod += *matrix_ptr++ * *vector_in_batch++;
}
*result_in_batch = dot_prod;
result_in_batch += memory_size;
}
}
}
ApplyTimeWeightsBiasAndActivation(
batch_size, memory_size, num_filters, num_units, rank, weights_time_ptr,
bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr);
}
TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
MicroContext* micro_context = GetMicroContext(context);
// Validate Tensor Inputs (dtype depends on quantization):
// [0] = Input, {2, batch_size, input_size}
// [1] = Weights Feature, {2, num_filters, input_size}
// [2] = Weights Time, {2, num_filters, memory_size}
// [3] = Bias (optional), {1, num_units}
// [4] = Activation State (variable),
// {2, batch_size, memory_size * num_filters}
TfLiteTensor* input =
micro_context->AllocateTempInputTensor(node, kSvdfInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* weights_feature =
micro_context->AllocateTempInputTensor(node, kSvdfWeightsFeatureTensor);
TF_LITE_ENSURE(context, weights_feature != nullptr);
TfLiteTensor* weights_time =
micro_context->AllocateTempInputTensor(node, kSvdfWeightsTimeTensor);
TF_LITE_ENSURE(context, weights_time != nullptr);
TfLiteTensor* bias =
micro_context->AllocateTempInputTensor(node, kSvdfBiasTensor);
TfLiteTensor* activation_state = micro_context->AllocateTempInputTensor(
node, kSvdfInputActivationStateTensor);
TF_LITE_ENSURE(context, activation_state != nullptr);
// Define input constants based on input tensor definition above:
const int rank = params->rank;
const int input_size = input->dims->data[1];
const int batch_size = input->dims->data[0];
const int num_filters = weights_feature->dims->data[0];
TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
const int num_units = num_filters / rank;
const int memory_size = weights_time->dims->data[1];
// Validate Input Tensor:
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
// Validate Tensor Output:
// [0] = float/int8_t, {2, batch_size, num_units}
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
TfLiteTensor* output =
micro_context->AllocateTempOutputTensor(node, kSvdfOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
// Validate Weights Feature Input Tensor:
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2);
TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size);
// Validate Weights Time Input Tensor:
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_time), 2);
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[0], num_filters);
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
// Validate Optional Bias Input Tensor:
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
}
// Validate Activation State Input Tensor:
TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2);
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
memory_size * num_filters);
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
TFLITE_DCHECK(node->user_data != nullptr);
OpDataSvdf* data = static_cast<OpDataSvdf*>(node->user_data);
if (input->type == kTfLiteInt8) {
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
TF_LITE_ENSURE(context, (weights_time->type == kTfLiteInt16) ||
(weights_time->type == kTfLiteInt8));
TF_LITE_ENSURE(context, (activation_state->type == kTfLiteInt16) ||
(activation_state->type == kTfLiteInt8));
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
}
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
const double effective_scale_1 = static_cast<double>(
input->params.scale * weights_feature->params.scale /
activation_state->params.scale);
const double effective_scale_2 =
static_cast<double>(activation_state->params.scale *
weights_time->params.scale / output->params.scale);
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
TF_LITE_ENSURE(
context,
std::abs(static_cast<double>(bias->params.scale) -
static_cast<double>(activation_state->params.scale *
weights_time->params.scale)) < 1e-5);
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
&(data->effective_scale_1_b));
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
&(data->effective_scale_2_b));
data->input_zero_point = input->params.zero_point;
data->output_zero_point = output->params.zero_point;
data->activation_state_zero_point = activation_state->params.zero_point;
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
context, batch_size * num_filters * sizeof(int32_t),
&(data->scratch_tensor_index));
TF_LITE_ENSURE_OK(context, scratch_status);
const TfLiteStatus scratch_output_status =
context->RequestScratchBufferInArena(
context, batch_size * num_units * sizeof(int32_t),
&(data->scratch_output_tensor_index));
TF_LITE_ENSURE_OK(context, scratch_output_status);
} else {
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
if (bias != nullptr) {
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
}
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
context, batch_size * num_filters * sizeof(float),
&(data->scratch_tensor_index));
TF_LITE_ENSURE_OK(context, scratch_status);
}
micro_context->DeallocateTempTfLiteTensor(input);
micro_context->DeallocateTempTfLiteTensor(weights_feature);
micro_context->DeallocateTempTfLiteTensor(weights_time);
micro_context->DeallocateTempTfLiteTensor(activation_state);
micro_context->DeallocateTempTfLiteTensor(output);
micro_context->DeallocateTempTfLiteTensor(bias);
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,93 +0,0 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/micro/micro_resource_variable.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
namespace {
struct OpData {
int32_t resource_id;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const auto* params =
reinterpret_cast<const TfLiteVarHandleParams*>(node->builtin_data);
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph& graph_info = micro_context->graph();
MicroResourceVariables* resources = graph_info.GetResourceVariables();
if (resources == nullptr) {
MicroPrintf(
"VAR_HANDLE requires resource variables. Please create "
"ResourceVariables and pass it to the interpreter.");
return kTfLiteError;
}
op_data->resource_id =
resources->CreateIdIfNoneFound(params->container, params->shared_name);
if (op_data->resource_id < 0) {
return kTfLiteError;
}
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TFLITE_DCHECK(output != nullptr);
// Assign saved resource_id so this output tensor will always return the
// correct resource id.
output->data.i32 = &op_data->resource_id;
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TFLITE_DCHECK(output != nullptr);
// Assign saved resource_id so this output tensor will always return the
// correct resource id.
output->data.i32 = &op_data->resource_id;
return kTfLiteOk;
}
} // namespace.
TfLiteRegistration Register_VAR_HANDLE() {
return tflite::micro::RegisterOp(Init, Prepare, Eval);
}
} // namespace tflite

View File

@@ -1,133 +0,0 @@
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include <cstring>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_context.h"
#include "tensorflow/lite/micro/micro_graph.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
struct OpData {
int cond_subgraph_index;
int body_subgraph_index;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const auto* params =
reinterpret_cast<const TfLiteWhileParams*>(node->builtin_data);
op_data->cond_subgraph_index = params->cond_subgraph_index;
op_data->body_subgraph_index = params->body_subgraph_index;
// The first input is the condition.
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
size_t num_inputs = node->inputs->size;
size_t num_outputs = node->outputs->size;
MicroGraph& graph_info = micro_context->graph();
TF_LITE_ENSURE(context,
op_data->cond_subgraph_index < graph_info.NumSubgraphs());
TF_LITE_ENSURE(context,
op_data->body_subgraph_index < graph_info.NumSubgraphs());
TF_LITE_ENSURE_EQ(context, num_inputs,
graph_info.NumSubgraphInputs(op_data->cond_subgraph_index));
TF_LITE_ENSURE_EQ(context, num_inputs,
graph_info.NumSubgraphInputs(op_data->body_subgraph_index));
TF_LITE_ENSURE_EQ(context, num_inputs, num_outputs);
TF_LITE_ENSURE_EQ(
context, num_outputs,
graph_info.NumSubgraphOutputs(op_data->body_subgraph_index));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
MicroGraph* graph_info = &micro_context->graph();
TF_LITE_ENSURE_OK(context,
tflite::micro::CopyOpInputsToSubgraphInputs(
context, node, graph_info, op_data->cond_subgraph_index,
/*first_tensor_idx=*/0));
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(op_data->cond_subgraph_index));
TfLiteEvalTensor* cond_subgraph_output = graph_info->GetSubgraphOutput(
op_data->cond_subgraph_index, /*tensor_idx=*/0);
bool cond_value = cond_subgraph_output->data.b[0];
TF_LITE_ENSURE_OK(context,
tflite::micro::CopyOpInputsToSubgraphInputs(
context, node, graph_info, op_data->body_subgraph_index,
/*first_tensor_idx=*/0));
TF_LITE_ENSURE_OK(context,
tflite::micro::CopyOpInputsToOpOutputs(context, node));
while (cond_value == true) {
// Copy output of this iteration back to the body input.
TF_LITE_ENSURE_OK(
context, tflite::micro::CopyOpOutputsToSubgraphInputs(
context, node, graph_info, op_data->body_subgraph_index));
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(op_data->body_subgraph_index));
TF_LITE_ENSURE_OK(
context, tflite::micro::CopySubgraphOutputsToOpOutputs(
context, node, graph_info, op_data->body_subgraph_index));
TF_LITE_ENSURE_OK(
context, tflite::micro::CopyOpOutputsToSubgraphInputs(
context, node, graph_info, op_data->cond_subgraph_index));
TF_LITE_ENSURE_OK(context,
graph_info->InvokeSubgraph(op_data->cond_subgraph_index));
cond_subgraph_output = graph_info->GetSubgraphOutput(
op_data->cond_subgraph_index, /*tensor_idx=*/0);
cond_value = cond_subgraph_output->data.b[0];
}
return kTfLiteOk;
}
} // namespace.
TfLiteRegistration Register_WHILE() {
return tflite::micro::RegisterOp(Init, Prepare, Eval);
}
} // namespace tflite

View File

@@ -1,170 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/memory_helpers.h"
#include <cstddef>
#include <cstdint>
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
uint8_t* AlignPointerUp(uint8_t* data, size_t alignment) {
std::uintptr_t data_as_uintptr_t = reinterpret_cast<std::uintptr_t>(data);
uint8_t* aligned_result = reinterpret_cast<uint8_t*>(
((data_as_uintptr_t + (alignment - 1)) / alignment) * alignment);
return aligned_result;
}
uint8_t* AlignPointerDown(uint8_t* data, size_t alignment) {
std::uintptr_t data_as_uintptr_t = reinterpret_cast<std::uintptr_t>(data);
uint8_t* aligned_result =
reinterpret_cast<uint8_t*>((data_as_uintptr_t / alignment) * alignment);
return aligned_result;
}
size_t AlignSizeUp(size_t size, size_t alignment) {
size_t aligned_size = (((size + (alignment - 1)) / alignment) * alignment);
return aligned_size;
}
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) {
switch (type) {
case kTfLiteFloat16:
*size = sizeof(int16_t);
break;
case kTfLiteFloat32:
*size = sizeof(float);
break;
case kTfLiteFloat64:
*size = sizeof(double);
break;
case kTfLiteInt16:
*size = sizeof(int16_t);
break;
case kTfLiteInt32:
*size = sizeof(int32_t);
break;
case kTfLiteUInt32:
*size = sizeof(uint32_t);
break;
case kTfLiteUInt8:
*size = sizeof(uint8_t);
break;
case kTfLiteInt8:
*size = sizeof(int8_t);
break;
case kTfLiteInt64:
*size = sizeof(int64_t);
break;
case kTfLiteUInt64:
*size = sizeof(uint64_t);
break;
case kTfLiteBool:
*size = sizeof(bool);
break;
case kTfLiteResource:
*size = sizeof(int32_t);
break;
case kTfLiteComplex64:
*size = sizeof(float) * 2;
break;
case kTfLiteComplex128:
*size = sizeof(double) * 2;
break;
default:
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter) {
int element_count = 1;
// If flatbuffer_tensor.shape == nullptr, then flatbuffer_tensor is a scalar
// so has 1 element.
if (flatbuffer_tensor.shape() != nullptr) {
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
element_count *= flatbuffer_tensor.shape()->Get(n);
}
}
TfLiteType tf_lite_type;
TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
&tf_lite_type, error_reporter));
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(tf_lite_type, type_size));
*bytes = element_count * (*type_size);
return kTfLiteOk;
}
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
size_t* out_bytes) {
TFLITE_DCHECK(out_bytes != nullptr);
int element_count = 1;
// If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element.
if (eval_tensor->dims != nullptr) {
for (int n = 0; n < eval_tensor->dims->size; ++n) {
element_count *= eval_tensor->dims->data[n];
}
}
size_t type_size;
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size));
*out_bytes = element_count * type_size;
return kTfLiteOk;
}
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output) {
const TfLiteTensor* input = nullptr;
TF_LITE_ENSURE(context, input1->dims != nullptr);
TF_LITE_ENSURE(context, input2->dims != nullptr);
TF_LITE_ENSURE(context, output->dims->size == 0);
input = input1->dims->size > input2->dims->size ? input1 : input2;
TF_LITE_ENSURE(context, output->type == input->type);
size_t size = 0;
TfLiteTypeSizeOf(input->type, &size);
const int dimensions_count = tflite::GetTensorShape(input).DimensionsCount();
for (int i = 0; i < dimensions_count; i++) {
size *= input->dims->data[i];
}
output->bytes = size;
output->dims =
reinterpret_cast<TfLiteIntArray*>(context->AllocatePersistentBuffer(
context, TfLiteIntArrayGetSizeInBytes(size)));
output->dims->size = input->dims->size;
for (int i = 0; i < dimensions_count; i++) {
output->dims->data[i] = input->dims->data[i];
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -1,59 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
#include <cstddef>
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Returns the next pointer address aligned to the given alignment.
uint8_t* AlignPointerUp(uint8_t* data, size_t alignment);
// Returns the previous pointer address aligned to the given alignment.
uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
// Returns an increased size that's a multiple of alignment.
size_t AlignSizeUp(size_t size, size_t alignment);
// Returns size in bytes for a given TfLiteType.
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
// How many bytes are needed to hold a tensor's contents.
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
size_t* bytes, size_t* type_size,
ErrorReporter* error_reporter);
// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
// returned in out_bytes.
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
size_t* out_bytes);
// Deduce output dimensions from input and allocate given size.
// Useful for operators with two inputs where the largest input should equal the
// output dimension.
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteTensor* output);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_

View File

@@ -1,452 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_string.h"
namespace tflite {
namespace {
// Returns a character representing a numbered buffer
// for GreedyMemoryPlanner::PrintMemoryPlan()
char GetOrdinalCharacter(int i) {
if (i < 10) {
return '0' + i;
} else if (i < 36) {
return 'a' + (i - 10);
} else if (i < 62) {
return 'A' + (i - 36);
}
return '*';
}
} // namespace
// Simple stable in-place sort function. Not time-efficient for large arrays.
// Would normally be in an anonymous namespace to keep it private, but we want
// to be able to test it externally.
void ReverseSortInPlace(int* values, int* ids, int size) {
bool any_swapped;
do {
any_swapped = false;
for (int i = 1; i < size; ++i) {
if (values[i - 1] < values[i]) {
const int value_temp = values[i - 1];
values[i - 1] = values[i];
values[i] = value_temp;
const int id_temp = ids[i - 1];
ids[i - 1] = ids[i];
ids[i] = id_temp;
any_swapped = true;
}
}
} while (any_swapped);
}
GreedyMemoryPlanner::GreedyMemoryPlanner() {}
TfLiteStatus GreedyMemoryPlanner::Init(unsigned char* scratch_buffer,
int scratch_buffer_size) {
// Reset internal states
buffer_count_ = 0;
need_to_calculate_offsets_ = true;
// Allocate the arrays we need within the scratch buffer arena.
max_buffer_count_ = scratch_buffer_size / per_buffer_size();
unsigned char* next_free = scratch_buffer;
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
next_free += sizeof(BufferRequirements) * max_buffer_count_;
buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
next_free += sizeof(ListEntry) * max_buffer_count_;
buffer_offsets_ = reinterpret_cast<int*>(next_free);
return kTfLiteOk;
}
GreedyMemoryPlanner::~GreedyMemoryPlanner() {
// We don't own the scratch buffer, so don't deallocate anything.
}
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
int last_time_used) {
if (buffer_count_ >= max_buffer_count_) {
TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
max_buffer_count_);
return kTfLiteError;
}
BufferRequirements* current = &requirements_[buffer_count_];
current->size = size;
current->first_time_used = first_time_used;
current->last_time_used = last_time_used;
current->offline_offset = kOnlinePlannedBuffer;
++buffer_count_;
need_to_calculate_offsets_ = true;
return kTfLiteOk;
}
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
int last_time_used, int offline_offset) {
BufferRequirements* current = &requirements_[buffer_count_];
if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
kTfLiteOk) {
return kTfLiteError;
}
current->offline_offset = offline_offset;
return kTfLiteOk;
}
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
const int last_time_used) const {
const BufferRequirements* entry_requirements =
&requirements_[entry->requirements_index];
if (entry_requirements->first_time_used > last_time_used) {
return false;
}
if (first_time_used > entry_requirements->last_time_used) {
return false;
}
return true;
}
GreedyMemoryPlanner::ListEntry*
GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
const GreedyMemoryPlanner::ListEntry* start, const int first_time_used,
const int last_time_used) {
ListEntry* result = nullptr;
ListEntry* candidate_next_entry;
if (start == nullptr) {
candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
} else {
if (start->next_entry_index == -1) {
return nullptr;
}
candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index];
}
do {
if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used,
last_time_used)) {
result = candidate_next_entry;
break;
}
if (candidate_next_entry->next_entry_index == -1) {
break;
}
candidate_next_entry =
&buffers_sorted_by_offset_[candidate_next_entry->next_entry_index];
} while (true);
return result;
}
void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) {
return;
}
need_to_calculate_offsets_ = false;
// Start off by ordering the buffers in descending order of size.
// This helps find a more compact layout. Intuitively, you can think
// about putting the large buffers in place first, and then the
// smaller buffers can fit in the gaps, rather than fragmenting the
// gaps with small buffers at the beginning. Add offline planned offsets
// first in the list, since they have a predetermined offset.
int idx_from_tail = buffer_count_;
int idx_from_head = 0;
for (int i = 0; i < buffer_count_; ++i) {
if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
idx_from_tail--;
buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
buffer_ids_sorted_[idx_from_tail] = i;
buffer_offsets_[i] = -1;
} else {
buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
buffer_ids_sorted_[idx_from_head] = i;
buffer_offsets_[i] = requirements_[i].offline_offset;
idx_from_head++;
}
}
// This sorting algorithm is naive, and may end up taking a very long time
// with hundreds of buffers. Do not sort the offline planned offsets.
ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
&buffer_ids_sorted_[idx_from_head],
buffer_count_ - idx_from_head);
// Initialize the first entry to the first buffer in
// buffer_ids_sorted_.
// - If there are no offline planned offsets, the largest buffer will be
// first, and the buffers will be handled in size order.
// - If offline offsets are present, these will be handled first in order
// for the greedy algorithm to utilized gaps in the offline plan.
first_entry_index_ = 0;
next_free_entry_ = 1;
ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
first_entry->next_entry_index = -1; // to mark the entry as end of list
int buffer_id = buffer_ids_sorted_[0];
first_entry->requirements_index = buffer_id;
if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
buffer_offsets_[buffer_id] = 0;
}
first_entry->offset = buffer_offsets_[buffer_id];
// Work through the rest of the buffers to find a good gap to place each one.
for (int i = 1; i < buffer_count_; ++i) {
// The id is the order the buffer was originally added by the client.
buffer_id = buffer_ids_sorted_[i];
// Look at what size and time range the buffer needs to be active.
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
const int wanted_size = wanted_requirements->size;
const int wanted_first_time_used = wanted_requirements->first_time_used;
const int wanted_last_time_used = wanted_requirements->last_time_used;
// Find the first buffer that's active in our time range. All placed
// buffers are stored in the order of their starting position in the arena
// so that it's easy to find the next buffer in memory, and so the gap.
// The candidate_entry variable holds the buffer that we're considering
// placing the current buffer after.
int candidate_offset = 0;
// Loop through the offset-ordered list of buffers, looking for gaps.
if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
ListEntry* prior_entry = nullptr;
while (true) {
// Find out what the next active buffer is.
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
prior_entry, wanted_first_time_used, wanted_last_time_used);
if (prior_entry) {
BufferRequirements* candidate_requirements =
&requirements_[prior_entry->requirements_index];
const int prior_entry_offset =
prior_entry->offset + candidate_requirements->size;
if (prior_entry_offset > candidate_offset) {
candidate_offset = prior_entry_offset;
}
}
if (next_entry == nullptr) {
// We're at the end of the list, so we can always append the buffer
// here.
break;
}
// Find out how much space there is between us and the next buffer.
const int gap = next_entry->offset - candidate_offset;
if (gap >= wanted_size) {
// This entry has a big enough gap between it and the next, so
// use it!
break;
}
// The gap wasn't big enough, so move on to another candidate.
prior_entry = next_entry;
}
} else {
// Offline planned offset are to be considered constant
candidate_offset = wanted_requirements->offline_offset;
}
// At this point, we've either found a gap (possibly at the end of the
// list) and want to place the buffer there, or there are no other active
// buffers in this time range and so we can put it at offset zero.
// Record the buffer's offset in our plan.
buffer_offsets_[buffer_id] = candidate_offset;
// Add the newly-placed buffer to our offset-ordered list, so that
// subsequent passes can fit in their buffers around it.
ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_];
new_entry->offset = candidate_offset;
new_entry->requirements_index = buffer_id;
const int new_entry_index = next_free_entry_;
++next_free_entry_;
if (first_entry->offset > candidate_offset) {
// The new entry offset is smaller than the first entry offset =>
// replace the first entry
first_entry = new_entry;
first_entry->next_entry_index = first_entry_index_;
first_entry_index_ = new_entry_index;
} else {
ListEntry* current_entry = first_entry;
// Make sure that we insert the buffer at the correct place in the
// buffer-offset-ordered list
while (true) {
const int next_entry_index = current_entry->next_entry_index;
if (next_entry_index == -1) {
// We're at the end of the list, so just add the new entry here.
current_entry->next_entry_index = new_entry_index;
new_entry->next_entry_index = -1;
break;
}
// not at the end of the list -> take a look at next entry
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
if (next_entry->offset > candidate_offset) {
// We're at the right spot to do an insertion and retain the sorting
// order, so place the new entry here.
new_entry->next_entry_index = current_entry->next_entry_index;
current_entry->next_entry_index = new_entry_index;
break;
}
current_entry = next_entry;
}
}
}
}
size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
CalculateOffsetsIfNeeded();
if (buffer_count_ == 0) {
return 0;
}
ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
size_t max_size = 0;
while (entry) {
BufferRequirements* requirements =
&requirements_[entry->requirements_index];
const size_t current_size = entry->offset + requirements->size;
if (current_size > max_size) {
max_size = current_size;
}
if (entry->next_entry_index == -1) {
break;
}
entry = &buffers_sorted_by_offset_[entry->next_entry_index];
}
return max_size;
}
void GreedyMemoryPlanner::PrintMemoryPlan() {
CalculateOffsetsIfNeeded();
for (int i = 0; i < buffer_count_; ++i) {
MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d",
GetOrdinalCharacter(i), i, requirements_[i].size,
buffer_offsets_[i], requirements_[i].first_time_used,
requirements_[i].last_time_used);
}
constexpr int kLineWidth = 80;
int max_size = kLineWidth;
int max_time = 0;
for (int i = 0; i < buffer_count_; ++i) {
BufferRequirements* requirements = &requirements_[i];
const int offset = buffer_offsets_[i];
const int last_time_used = requirements->last_time_used;
const int size = offset + requirements->size;
if (size > max_size) {
max_size = size;
}
if (last_time_used > max_time) {
max_time = last_time_used;
}
}
char line[kLineWidth + 1];
for (int t = 0; t <= max_time; ++t) {
for (int c = 0; c < kLineWidth; ++c) {
line[c] = '.';
}
int memory_use = 0;
for (int i = 0; i < buffer_count_; ++i) {
BufferRequirements* requirements = &requirements_[i];
if ((t < requirements->first_time_used) ||
(t > requirements->last_time_used)) {
continue;
}
const int offset = buffer_offsets_[i];
if (offset == -1) {
continue;
}
const int size = requirements->size;
memory_use += size;
const int line_start = (offset * kLineWidth) / max_size;
const int line_end = ((offset + size) * kLineWidth) / max_size;
for (int n = line_start; n < line_end; ++n) {
if (line[n] == '.') {
line[n] = GetOrdinalCharacter(i);
} else {
line[n] = '!';
}
}
}
line[kLineWidth] = 0;
MicroPrintf("%s%d: %s (%dk)", t < 10 ? " " : "", t, (const char*)line,
(memory_use + 1023) / 1024);
}
}
int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; }
TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(
tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
CalculateOffsetsIfNeeded();
if ((buffer_index < 0) || (buffer_index >= buffer_count_)) {
TF_LITE_REPORT_ERROR(error_reporter,
"buffer index %d is outside range 0 to %d",
buffer_index, buffer_count_);
return kTfLiteError;
}
*offset = buffer_offsets_[buffer_index];
return kTfLiteOk;
}
bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
CalculateOffsetsIfNeeded();
bool were_overlaps_found = false;
for (int i = 0; i < buffer_count_; ++i) {
BufferRequirements* a_requirements = &requirements_[i];
const int a_start_offset = buffer_offsets_[i];
const int a_first_time_used = a_requirements->first_time_used;
const int a_last_time_used = a_requirements->last_time_used;
const int a_end_offset = a_start_offset + a_requirements->size;
for (int j = 0; j < buffer_count_; ++j) {
if (i == j) {
continue;
}
BufferRequirements* b_requirements = &requirements_[j];
const int b_start_offset = buffer_offsets_[j];
const int b_first_time_used = b_requirements->first_time_used;
const int b_last_time_used = b_requirements->last_time_used;
const int b_end_offset = b_start_offset + b_requirements->size;
if ((a_first_time_used > b_last_time_used) ||
(b_first_time_used > a_last_time_used)) {
// Buffers don't overlap in time.
continue;
}
if ((a_start_offset >= b_end_offset) ||
(b_start_offset >= a_end_offset)) {
// No overlap in memory.
continue;
}
were_overlaps_found = true;
TF_LITE_REPORT_ERROR(
error_reporter, "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)",
i, a_first_time_used, a_last_time_used, a_start_offset, a_end_offset,
j, b_first_time_used, b_last_time_used, b_start_offset, b_end_offset);
}
}
return were_overlaps_found;
}
} // namespace tflite

View File

@@ -1,167 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
namespace tflite {
constexpr int kOnlinePlannedBuffer = -1;
// A memory planner that uses a greedy algorithm to arrange buffers in memory
// to minimize the overall arena size needed.
//
// The algorithm works like this:
// - The client enters the buffer information through AddBuffer().
// - When a function like GetOffsetForBuffer() is called, the
// CalculateOffsetsIfNeeded() method is invoked.
// - If an up to date plan is not already present, one will be calculated.
// - The buffers are sorted in descending order of size.
// - The largest buffer is placed at offset zero.
// - The rest of the buffers are looped through in descending size order.
// - The other buffers that need to be in memory at the same time are found.
// - The first gap between simultaneously active buffers that the current
// buffer fits into will be used.
// - If no large-enough gap is found, the current buffer is placed after the
// last buffer that's simultaneously active.
// - This continues until all buffers are placed, and the offsets stored.
//
// This is not guaranteed to produce the best placement, since that's an
// NP-Complete problem, but in practice it should produce one that's decent.
class GreedyMemoryPlanner : public MicroMemoryPlanner {
public:
GreedyMemoryPlanner();
~GreedyMemoryPlanner() override;
// You need to pass in an area of memory to be used for planning. The client
// should ensure the validity of the memory when it needs to use this object.
// This memory isn't owned by this object, so management should be handled by
// the client. This is so it can be stack or globally allocated if necessary
// on devices without dynamic memory allocation. How many buffers can be
// planned for will depend on the size of this scratch memory, so you should
// enlarge it if you see an error when calling AddBuffer(). The memory can be
// reused once you're done with the planner, as long as you copy the
// calculated offsets to another location. Each buffer requires about 36 bytes
// of scratch.
TfLiteStatus Init(unsigned char* scratch_buffer,
int scratch_buffer_size) override;
// Record details of a buffer we want to place.
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used) override;
// Record details of an offline planned buffer offset we want to place.
// offline_offset is the buffer offset from the start of the arena.
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used,
int offline_offset) override;
// Returns the high-water mark of used memory. This is the minimum size of a
// memory arena you'd need to allocate to hold these buffers.
size_t GetMaximumMemorySize() override;
// How many buffers have been recorded.
int GetBufferCount() override;
// Where a given buffer should be placed in the memory arena.
// This information is stored in the memory arena itself, so once the arena
// is used for inference, it will be overwritten.
TfLiteStatus GetOffsetForBuffer(ErrorReporter* error_reporter,
int buffer_index, int* offset) override;
// Prints an ascii-art diagram of the buffer layout plan.
void PrintMemoryPlan() override;
// Debug method to check whether any buffer allocations are overlapping. This
// is an O(N^2) complexity operation, so only use for testing.
bool DoAnyBuffersOverlap(ErrorReporter* error_reporter);
// Used to store a list of buffers ordered by their offset.
struct ListEntry {
int offset;
int requirements_index;
int next_entry_index;
};
// Number of bytes required in order to plan a buffer.
static size_t per_buffer_size() {
const int per_buffer_size =
sizeof(BufferRequirements) + // requirements_
sizeof(int) + // buffer_sizes_sorted_
sizeof(int) + // buffer_ids_sorted_
sizeof(ListEntry) + // buffers_sorted_by_offset_
sizeof(int); // buffer_offsets_;
return per_buffer_size;
}
private:
// Whether a buffer is active in a given time range.
bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,
const int last_time_used) const;
// Walks the list to return the next buffer that is active in a given time
// range, or a null pointer if there are none.
ListEntry* NextSimultaneouslyActiveBuffer(const ListEntry* start,
const int first_time_used,
const int last_time_used);
// If there isn't an up to date plan, calculate a new one.
void CalculateOffsetsIfNeeded();
// How many buffers we can plan for, based on the arena size we're given in
// the constructor.
int max_buffer_count_;
// The number of buffers added so far.
int buffer_count_;
// Records the client-provided information about each buffer.
struct BufferRequirements {
int size;
int offline_offset;
int first_time_used;
int last_time_used;
};
// Working arrays used during the layout algorithm.
BufferRequirements* requirements_;
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
// {
// offline planned buffers,
// online planned buffers sorted by size
// }
int* buffer_sizes_sorted_;
int* buffer_ids_sorted_;
ListEntry* buffers_sorted_by_offset_;
int next_free_entry_; // Index of the next free entry of
// buffers_sorted_by_offset_
int first_entry_index_; // Index of the first entry (smallest offset) of
// buffers_sorted_by_offset_
// Stores the outcome of the plan, the location of each buffer in the arena.
int* buffer_offsets_;
// Whether buffers have been added since the last plan was calculated.
bool need_to_calculate_offsets_;
TF_LITE_REMOVE_VIRTUAL_DELETE
};
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_

Some files were not shown because too many files have changed in this diff Show More