mirror of
https://github.com/jomjol/AI-on-the-edge-device.git
synced 2025-12-11 05:56:57 +03:00
removed tflite-lib
This commit is contained in:
@@ -1,38 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// The magic number in the template parameter is the maximum number of ops that
|
||||
// can be added to AllOpsResolver. It can be increased if needed. And most
|
||||
// applications that care about the memory footprint will want to directly use
|
||||
// MicroMutableOpResolver and have an application specific template parameter.
|
||||
// The examples directory has sample code for this.
|
||||
class AllOpsResolver : public MicroMutableOpResolver<128> {
|
||||
public:
|
||||
AllOpsResolver();
|
||||
|
||||
private:
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_ALL_OPS_RESOLVER_H_
|
||||
@@ -1,100 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/c_api_types.h"
|
||||
|
||||
namespace tflite {
|
||||
// Interface classes that the TFLM framework relies on to get buffers it needs.
|
||||
// There are two types of buffers that the TFLM framework requires: persistent
|
||||
// and non-persistent. Persistent buffers, once allocated, are never freed by
|
||||
// the TFLM framework. Non-persist buffers can be allocated and deallocated by
|
||||
// the TFLM framework. This file defines two interfaces classes that TFLM
|
||||
// framework will rely on to manage these buffers.
|
||||
|
||||
// Interface class for managing persistent buffers.
|
||||
class IPersistentBufferAllocator {
|
||||
public:
|
||||
IPersistentBufferAllocator() {}
|
||||
virtual ~IPersistentBufferAllocator() {}
|
||||
|
||||
// Allocates persistent memory. The persistent buffer is never freed.
|
||||
virtual uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) = 0;
|
||||
|
||||
// Returns the size of all persistent allocations in bytes.
|
||||
virtual size_t GetPersistentUsedBytes() const = 0;
|
||||
};
|
||||
|
||||
// Interface class for managing non-persistent buffers.
|
||||
// The default non-persistent buffers are temp buffers that are not resizable.
|
||||
// Support of at least one resizable buffer is required.
|
||||
class INonPersistentBufferAllocator {
|
||||
public:
|
||||
INonPersistentBufferAllocator() {}
|
||||
virtual ~INonPersistentBufferAllocator() {}
|
||||
|
||||
// Allocates a temporary buffer. This buffer is not resizable.
|
||||
virtual uint8_t* AllocateTemp(size_t size, size_t alignment) = 0;
|
||||
|
||||
// Signals that a temporary buffer is no longer needed.
|
||||
virtual void DeallocateTemp(uint8_t* buf) = 0;
|
||||
|
||||
// Returns true if all temporary buffers are already deallocated.
|
||||
virtual bool IsAllTempDeallocated() = 0;
|
||||
|
||||
// Signals that all temporary allocations can be reclaimed. TFLM calls this
|
||||
// API when it knows that all temporary buffers that it requested has been
|
||||
// deallocated. The goal of API is to facilitate implementations of
|
||||
// INonPersistentBufferAllocator can reuse buffer with some reasonable
|
||||
// complexity.
|
||||
virtual TfLiteStatus ResetTempAllocations() = 0;
|
||||
|
||||
// Returns a buffer that is resizable viable ResizeBuffer().
|
||||
virtual uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) = 0;
|
||||
|
||||
// Resizes a buffer that is previously returned by the
|
||||
// AllocateResizableBuffer.
|
||||
virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
|
||||
size_t alignment) = 0;
|
||||
|
||||
// Frees up the memory occupied by the resizable buffer.
|
||||
virtual TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) = 0;
|
||||
|
||||
// Returns a pointer pointing to the start of the overlay memory, which is
|
||||
// used for activation tensors and scratch buffers by kernels at Invoke stage.
|
||||
virtual uint8_t* GetOverlayMemoryAddress() const = 0;
|
||||
|
||||
// Reserves the size of the overlay memory. This overlay is reserved for the
|
||||
// kernels at Invoke stage. This is referred to as the overlay because before
|
||||
// Invoket state, the same memory can be used for temp buffers. The layout of
|
||||
// the memory is planned by the memory planner separately at Invoke stage.
|
||||
virtual TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size,
|
||||
size_t alignment) = 0;
|
||||
|
||||
// Returns the size of non-persistent buffer in use.
|
||||
virtual size_t GetNonPersistentUsedBytes() const = 0;
|
||||
|
||||
// Returns the number of bytes available with a given alignment. This number
|
||||
// takes in account any temporary allocations.
|
||||
virtual size_t GetAvailableMemory(size_t alignment) const = 0;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
|
||||
@@ -1,170 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h"
|
||||
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
NonPersistentArenaBufferAllocator::NonPersistentArenaBufferAllocator(
|
||||
uint8_t* buffer, size_t buffer_size)
|
||||
: buffer_head_(buffer),
|
||||
buffer_tail_(buffer + buffer_size),
|
||||
head_temp_(buffer),
|
||||
next_temp_(buffer) {}
|
||||
|
||||
NonPersistentArenaBufferAllocator::~NonPersistentArenaBufferAllocator() {}
|
||||
|
||||
// Allocates a temporary buffer. This buffer is not resizable.
|
||||
uint8_t* NonPersistentArenaBufferAllocator::AllocateTemp(size_t size,
|
||||
size_t alignment) {
|
||||
uint8_t* const aligned_result = AlignPointerUp(next_temp_, alignment);
|
||||
const size_t available_memory = buffer_tail_ - aligned_result;
|
||||
if (available_memory < size) {
|
||||
MicroPrintf(
|
||||
"Failed to allocate temp memory. Requested: %u, "
|
||||
"available %u, missing: %u",
|
||||
size, available_memory, size - available_memory);
|
||||
return nullptr;
|
||||
}
|
||||
next_temp_ = aligned_result + size;
|
||||
temp_buffer_ptr_check_sum_ ^= reinterpret_cast<intptr_t>(aligned_result);
|
||||
temp_buffer_count_++;
|
||||
return aligned_result;
|
||||
}
|
||||
|
||||
// Signals that a temporary buffer is no longer needed.
|
||||
void NonPersistentArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) {
|
||||
temp_buffer_ptr_check_sum_ ^= reinterpret_cast<intptr_t>(temp_buf);
|
||||
temp_buffer_count_--;
|
||||
}
|
||||
|
||||
// Returns true if all temporary buffers are already deallocated.
|
||||
bool NonPersistentArenaBufferAllocator::IsAllTempDeallocated() {
|
||||
if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) {
|
||||
MicroPrintf(
|
||||
"Number of allocated temp buffers: %d. Checksum passing status: %d",
|
||||
temp_buffer_count_, !temp_buffer_ptr_check_sum_);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Signals that all temporary allocations can be reclaimed. TFLM calls this
|
||||
// API when it knows that all temporary buffers that it requested has been
|
||||
// deallocated. The goal of API is to facilitate implementations of
|
||||
// INonPersistentBufferAllocator can reuse buffer with some reasonable
|
||||
// complexity.
|
||||
TfLiteStatus NonPersistentArenaBufferAllocator::ResetTempAllocations() {
|
||||
if (!IsAllTempDeallocated()) {
|
||||
MicroPrintf(
|
||||
"All temp buffers must be freed before calling ResetTempAllocations()");
|
||||
return kTfLiteError;
|
||||
}
|
||||
next_temp_ = head_temp_;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Returns a buffer that is resizable viable ResizeBuffer().
|
||||
uint8_t* NonPersistentArenaBufferAllocator::AllocateResizableBuffer(
|
||||
size_t size, size_t alignment) {
|
||||
// Only supports one resizable buffer, which starts at the buffer head.
|
||||
uint8_t* expected_resizable_buf = AlignPointerUp(buffer_head_, alignment);
|
||||
|
||||
if (resizable_buffer_allocated_) {
|
||||
MicroPrintf(
|
||||
"Cannot allocate a new resizable buffer when one is already allocated");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (ResizeBuffer(expected_resizable_buf, size, alignment) == kTfLiteOk) {
|
||||
resizable_buffer_allocated_ = true;
|
||||
return expected_resizable_buf;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Resizes a buffer that is previously returned by the AllocateResizableBuffer.
|
||||
// Note that ResizeBuffer(old_resizable_buf, 0, 1) effectively deallocates
|
||||
// a previous allocated resizable buffer.
|
||||
TfLiteStatus NonPersistentArenaBufferAllocator::ResizeBuffer(
|
||||
uint8_t* resizable_buf, size_t size, size_t alignment) {
|
||||
// Only supports one resizable buffer, which starts at the buffer head.
|
||||
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
|
||||
if (resizable_buf != expect_resizable_buf) {
|
||||
MicroPrintf("Internal error: buffer is not resizable");
|
||||
return kTfLiteError;
|
||||
}
|
||||
if (head_temp_ != next_temp_) {
|
||||
MicroPrintf("ResetTempAllocations() is not called before ResizeBuffer().");
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
const size_t available_memory = buffer_tail_ - expect_resizable_buf;
|
||||
if (available_memory < size) {
|
||||
MicroPrintf(
|
||||
"Failed to resize buffer. Requested: %u, available %u, missing: %u",
|
||||
size, available_memory, size - available_memory);
|
||||
return kTfLiteError;
|
||||
}
|
||||
head_temp_ = expect_resizable_buf + size;
|
||||
next_temp_ = head_temp_;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// Frees up the memory occupied by the resizable buffer.
|
||||
TfLiteStatus NonPersistentArenaBufferAllocator::DeallocateResizableBuffer(
|
||||
uint8_t* resizable_buf) {
|
||||
TfLiteStatus status = ResizeBuffer(resizable_buf, 0, 1);
|
||||
if (status == kTfLiteOk) {
|
||||
resizable_buffer_allocated_ = false;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
// Returns a pointer pointing to the start of the overlay memory, which is
|
||||
// used for activation tensors and scratch buffers by kernels at Invoke stage.
|
||||
uint8_t* NonPersistentArenaBufferAllocator::GetOverlayMemoryAddress() const {
|
||||
return buffer_head_;
|
||||
}
|
||||
|
||||
// Reserves the size of the overlay memory. This overlay is reserved for the
|
||||
// kernels at Invoke stage. This is referred to as the overlay because before
|
||||
// Invoket state, the same memory can be used for temp buffers. The layout of
|
||||
// the memory is planned by the memory planner separately at Invoke stage.
|
||||
TfLiteStatus
|
||||
NonPersistentArenaBufferAllocator::ReserveNonPersistentOverlayMemory(
|
||||
size_t size, size_t alignment) {
|
||||
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
|
||||
return ResizeBuffer(expect_resizable_buf, size, alignment);
|
||||
}
|
||||
|
||||
// Returns the size of non-persistent buffer in use.
|
||||
size_t NonPersistentArenaBufferAllocator::GetNonPersistentUsedBytes() const {
|
||||
return (next_temp_ - buffer_head_);
|
||||
}
|
||||
|
||||
// Returns the number of bytes available with a given alignment. This number
|
||||
// takes in account any temporary allocations.
|
||||
size_t NonPersistentArenaBufferAllocator::GetAvailableMemory(
|
||||
size_t alignment) const {
|
||||
uint8_t* const aligned_temp = AlignPointerUp(next_temp_, alignment);
|
||||
uint8_t* const aligned_tail = AlignPointerDown(buffer_tail_, alignment);
|
||||
return aligned_tail - aligned_temp;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,105 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Implement INonPersistentBufferAllocator on an arena that is dedicated for
|
||||
// non-persistent buffers.
|
||||
class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator {
|
||||
public:
|
||||
NonPersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size);
|
||||
virtual ~NonPersistentArenaBufferAllocator();
|
||||
|
||||
// Allocates a temporary buffer. This buffer is not resizable.
|
||||
uint8_t* AllocateTemp(size_t size, size_t alignment) override;
|
||||
|
||||
// Signals that a temporary buffer is no longer needed.
|
||||
void DeallocateTemp(uint8_t* buf) override;
|
||||
|
||||
// Returns true if all temporary buffers are already deallocated.
|
||||
bool IsAllTempDeallocated() override;
|
||||
|
||||
// Signals that all temporary allocations can be reclaimed. TFLM calls this
|
||||
// API when it knows that all temporary buffers that it requested has been
|
||||
// deallocated.
|
||||
TfLiteStatus ResetTempAllocations() override;
|
||||
|
||||
// Returns a buffer that is resizable viable ResizeBuffer().
|
||||
uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) override;
|
||||
|
||||
// Resizes a buffer that is previously returned by the
|
||||
// AllocateResizableBuffer.
|
||||
TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
|
||||
size_t alignment) override;
|
||||
|
||||
// Frees up the memory occupied by the resizable buffer.
|
||||
TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) override;
|
||||
|
||||
// Returns a pointer pointing to the start of the overlay memory, which is
|
||||
// used for activation tensors and scratch buffers by kernels at Invoke stage.
|
||||
uint8_t* GetOverlayMemoryAddress() const override;
|
||||
|
||||
// Reserves the size of the overlay memory. This overlay is reserved for the
|
||||
// kernels at Invoke stage. This is referred to as the overlay because before
|
||||
// Invoket state, the same memory can be used for temp buffers. The layout of
|
||||
// the memory is planned by the memory planner separately at Invoke stage.
|
||||
TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size,
|
||||
size_t alignment) override;
|
||||
|
||||
// Returns the size of non-persistent buffer in use.
|
||||
size_t GetNonPersistentUsedBytes() const override;
|
||||
|
||||
// Returns the number of bytes available with a given alignment. This number
|
||||
// takes in account any temporary allocations.
|
||||
size_t GetAvailableMemory(size_t alignment) const override;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
|
||||
private:
|
||||
// The memory arena that this allocator manages.
|
||||
uint8_t* const buffer_head_;
|
||||
uint8_t* const buffer_tail_;
|
||||
|
||||
// The whole region is split into two parts:
|
||||
// buffer_head_ to head_temp_ - 1 belongs to the only resizable buffer.
|
||||
// head_temp_ to buffer_tail_ can be used for (non-resizable) temp buffers.
|
||||
uint8_t* head_temp_;
|
||||
|
||||
// next_temp_ points to the next available temp buffer allocation address and
|
||||
// its range is between head_temp_ and buffer_tail_
|
||||
uint8_t* next_temp_;
|
||||
|
||||
// XOR Check sum for outstanding temp buffers.
|
||||
// If all temp buffers are deallocated OR no temp buffers are allocated,
|
||||
// temp_buffer_ptr_check_sum_ == nullptr.
|
||||
intptr_t temp_buffer_ptr_check_sum_ = 0;
|
||||
// Count of outstanding temp buffers.
|
||||
int temp_buffer_count_ = 0;
|
||||
bool resizable_buffer_allocated_ = false;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
|
||||
@@ -1,52 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h"
|
||||
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
PersistentArenaBufferAllocator::PersistentArenaBufferAllocator(
|
||||
uint8_t* buffer, size_t buffer_size)
|
||||
: buffer_head_(buffer),
|
||||
buffer_tail_(buffer + buffer_size),
|
||||
tail_temp_(buffer_tail_) {}
|
||||
|
||||
PersistentArenaBufferAllocator::~PersistentArenaBufferAllocator() {}
|
||||
|
||||
uint8_t* PersistentArenaBufferAllocator::AllocatePersistentBuffer(
|
||||
size_t size, size_t alignment) {
|
||||
uint8_t* const aligned_result =
|
||||
AlignPointerDown(tail_temp_ - size, alignment);
|
||||
if (aligned_result < buffer_head_) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
const size_t missing_memory = buffer_head_ - aligned_result;
|
||||
MicroPrintf(
|
||||
"Failed to allocate tail memory. Requested: %u, "
|
||||
"available %u, missing: %u",
|
||||
size, size - missing_memory, missing_memory);
|
||||
#endif
|
||||
return nullptr;
|
||||
}
|
||||
tail_temp_ = aligned_result;
|
||||
return aligned_result;
|
||||
}
|
||||
|
||||
size_t PersistentArenaBufferAllocator::GetPersistentUsedBytes() const {
|
||||
return buffer_tail_ - tail_temp_;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,59 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// PersistentArenaBufferAllocator is an implementatation of
|
||||
// IPersistentBufferAllocator interface on an arena that is dedicated for
|
||||
// persistent buffers.
|
||||
class PersistentArenaBufferAllocator : public IPersistentBufferAllocator {
|
||||
public:
|
||||
PersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size);
|
||||
virtual ~PersistentArenaBufferAllocator();
|
||||
|
||||
// Allocates persistent memory. The persistent buffer is never freed.
|
||||
// Returns nullptr if errors occured.
|
||||
uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override;
|
||||
|
||||
// Returns the size of all persistent allocations in bytes.
|
||||
size_t GetPersistentUsedBytes() const override;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
private:
|
||||
// The memory arena that this allocator manages.
|
||||
uint8_t* const buffer_head_;
|
||||
uint8_t* const buffer_tail_;
|
||||
|
||||
// The whole region is split into two parts:
|
||||
// tail_temp_ to buffer_tail_ contains allocated buffers;
|
||||
// buffer_head_ to tail_temp_ - 1 belongs to still available spaces.
|
||||
// So in essence, the allocated region grows from the bottom and emulates
|
||||
// SingleArenaBufferAllocator's persistent part.
|
||||
uint8_t* tail_temp_;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
|
||||
@@ -1,87 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h"
|
||||
|
||||
#include <new>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
RecordingSingleArenaBufferAllocator::RecordingSingleArenaBufferAllocator(
|
||||
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size)
|
||||
: SingleArenaBufferAllocator(error_reporter, buffer_head, buffer_size),
|
||||
requested_head_bytes_(0),
|
||||
requested_tail_bytes_(0),
|
||||
used_bytes_(0),
|
||||
alloc_count_(0) {}
|
||||
|
||||
RecordingSingleArenaBufferAllocator::~RecordingSingleArenaBufferAllocator() {}
|
||||
|
||||
RecordingSingleArenaBufferAllocator*
|
||||
RecordingSingleArenaBufferAllocator::Create(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head,
|
||||
size_t buffer_size) {
|
||||
TFLITE_DCHECK(error_reporter != nullptr);
|
||||
TFLITE_DCHECK(buffer_head != nullptr);
|
||||
RecordingSingleArenaBufferAllocator tmp = RecordingSingleArenaBufferAllocator(
|
||||
error_reporter, buffer_head, buffer_size);
|
||||
|
||||
uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer(
|
||||
sizeof(RecordingSingleArenaBufferAllocator),
|
||||
alignof(RecordingSingleArenaBufferAllocator));
|
||||
// Use the default copy constructor to populate internal states.
|
||||
return new (allocator_buffer) RecordingSingleArenaBufferAllocator(tmp);
|
||||
}
|
||||
|
||||
size_t RecordingSingleArenaBufferAllocator::GetRequestedBytes() const {
|
||||
return requested_head_bytes_ + requested_tail_bytes_;
|
||||
}
|
||||
|
||||
size_t RecordingSingleArenaBufferAllocator::GetUsedBytes() const {
|
||||
return used_bytes_;
|
||||
}
|
||||
|
||||
size_t RecordingSingleArenaBufferAllocator::GetAllocatedCount() const {
|
||||
return alloc_count_;
|
||||
}
|
||||
|
||||
TfLiteStatus RecordingSingleArenaBufferAllocator::ResizeBuffer(
|
||||
uint8_t* resizable_buf, size_t size, size_t alignment) {
|
||||
const uint8_t* previous_head = head();
|
||||
TfLiteStatus status =
|
||||
SingleArenaBufferAllocator::ResizeBuffer(resizable_buf, size, alignment);
|
||||
if (status == kTfLiteOk) {
|
||||
used_bytes_ += head() - previous_head;
|
||||
requested_head_bytes_ = size;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
uint8_t* RecordingSingleArenaBufferAllocator::AllocatePersistentBuffer(
|
||||
size_t size, size_t alignment) {
|
||||
const uint8_t* previous_tail = tail();
|
||||
uint8_t* result =
|
||||
SingleArenaBufferAllocator::AllocatePersistentBuffer(size, alignment);
|
||||
if (result != nullptr) {
|
||||
used_bytes_ += previous_tail - tail();
|
||||
requested_tail_bytes_ += size;
|
||||
alloc_count_++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,64 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
|
||||
|
||||
#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Utility class used to log allocations of a SingleArenaBufferAllocator. Should
|
||||
// only be used in debug/evaluation settings or unit tests to evaluate
|
||||
// allocation usage.
|
||||
class RecordingSingleArenaBufferAllocator : public SingleArenaBufferAllocator {
|
||||
public:
|
||||
RecordingSingleArenaBufferAllocator(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head, size_t buffer_size);
|
||||
// TODO(b/157615197): Cleanup constructors/destructor and use factory
|
||||
// functions.
|
||||
~RecordingSingleArenaBufferAllocator() override;
|
||||
|
||||
static RecordingSingleArenaBufferAllocator* Create(
|
||||
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size);
|
||||
|
||||
// Returns the number of bytes requested from the head or tail.
|
||||
size_t GetRequestedBytes() const;
|
||||
|
||||
// Returns the number of bytes actually allocated from the head or tail. This
|
||||
// value will be >= to the number of requested bytes due to padding and
|
||||
// alignment.
|
||||
size_t GetUsedBytes() const;
|
||||
|
||||
// Returns the number of alloc calls from the head or tail.
|
||||
size_t GetAllocatedCount() const;
|
||||
|
||||
TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
|
||||
size_t alignment) override;
|
||||
uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override;
|
||||
|
||||
private:
|
||||
size_t requested_head_bytes_;
|
||||
size_t requested_tail_bytes_;
|
||||
size_t used_bytes_;
|
||||
size_t alloc_count_;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
|
||||
@@ -1,209 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <new>
|
||||
|
||||
#include "tensorflow/lite/c/c_api_types.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
SingleArenaBufferAllocator::SingleArenaBufferAllocator(
|
||||
ErrorReporter* error_reporter, uint8_t* buffer_head, uint8_t* buffer_tail)
|
||||
:
|
||||
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
|
||||
error_reporter_(error_reporter),
|
||||
#endif
|
||||
buffer_head_(buffer_head),
|
||||
buffer_tail_(buffer_tail),
|
||||
head_(buffer_head),
|
||||
tail_(buffer_tail),
|
||||
temp_(buffer_head_) {
|
||||
}
|
||||
|
||||
SingleArenaBufferAllocator::SingleArenaBufferAllocator(
|
||||
ErrorReporter* error_reporter, uint8_t* buffer, size_t buffer_size)
|
||||
: SingleArenaBufferAllocator(error_reporter, buffer, buffer + buffer_size) {
|
||||
}
|
||||
|
||||
/* static */
|
||||
SingleArenaBufferAllocator* SingleArenaBufferAllocator::Create(
|
||||
ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
|
||||
TFLITE_DCHECK(error_reporter != nullptr);
|
||||
TFLITE_DCHECK(buffer_head != nullptr);
|
||||
SingleArenaBufferAllocator tmp =
|
||||
SingleArenaBufferAllocator(error_reporter, buffer_head, buffer_size);
|
||||
|
||||
// Allocate enough bytes from the buffer to create a
|
||||
// SingleArenaBufferAllocator. The new instance will use the current adjusted
|
||||
// tail buffer from the tmp allocator instance.
|
||||
uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer(
|
||||
sizeof(SingleArenaBufferAllocator), alignof(SingleArenaBufferAllocator));
|
||||
// Use the default copy constructor to populate internal states.
|
||||
return new (allocator_buffer) SingleArenaBufferAllocator(tmp);
|
||||
}
|
||||
|
||||
SingleArenaBufferAllocator::~SingleArenaBufferAllocator() {}
|
||||
|
||||
uint8_t* SingleArenaBufferAllocator::AllocateResizableBuffer(size_t size,
|
||||
size_t alignment) {
|
||||
// Only supports one resizable buffer, which starts at the buffer head.
|
||||
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
|
||||
if (ResizeBuffer(expect_resizable_buf, size, alignment) == kTfLiteOk) {
|
||||
return expect_resizable_buf;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
TfLiteStatus SingleArenaBufferAllocator::DeallocateResizableBuffer(
|
||||
uint8_t* resizable_buf) {
|
||||
return ResizeBuffer(resizable_buf, 0, 1);
|
||||
}
|
||||
|
||||
TfLiteStatus SingleArenaBufferAllocator::ReserveNonPersistentOverlayMemory(
|
||||
size_t size, size_t alignment) {
|
||||
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
|
||||
return ResizeBuffer(expect_resizable_buf, size, alignment);
|
||||
}
|
||||
|
||||
TfLiteStatus SingleArenaBufferAllocator::ResizeBuffer(uint8_t* resizable_buf,
|
||||
size_t size,
|
||||
size_t alignment) {
|
||||
// Only supports one resizable buffer, which starts at the buffer head.
|
||||
uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
|
||||
if (head_ != temp_ || resizable_buf != expect_resizable_buf) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Internal error: either buffer is not resizable or "
|
||||
"ResetTempAllocations() is not called before ResizeBuffer().");
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment);
|
||||
const size_t available_memory = tail_ - aligned_result;
|
||||
if (available_memory < size) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"Failed to resize buffer. Requested: %u, available %u, missing: %u",
|
||||
size, available_memory, size - available_memory);
|
||||
return kTfLiteError;
|
||||
}
|
||||
head_ = aligned_result + size;
|
||||
temp_ = head_;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
uint8_t* SingleArenaBufferAllocator::AllocatePersistentBuffer(
|
||||
size_t size, size_t alignment) {
|
||||
uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment);
|
||||
if (aligned_result < head_) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
const size_t missing_memory = head_ - aligned_result;
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Failed to allocate tail memory. Requested: %u, "
|
||||
"available %u, missing: %u",
|
||||
size, size - missing_memory, missing_memory);
|
||||
#endif
|
||||
return nullptr;
|
||||
}
|
||||
tail_ = aligned_result;
|
||||
return aligned_result;
|
||||
}
|
||||
|
||||
uint8_t* SingleArenaBufferAllocator::AllocateTemp(size_t size,
|
||||
size_t alignment) {
|
||||
uint8_t* const aligned_result = AlignPointerUp(temp_, alignment);
|
||||
const size_t available_memory = tail_ - aligned_result;
|
||||
if (available_memory < size) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter_,
|
||||
"Failed to allocate temp memory. Requested: %u, "
|
||||
"available %u, missing: %u",
|
||||
size, available_memory, size - available_memory);
|
||||
return nullptr;
|
||||
}
|
||||
temp_ = aligned_result + size;
|
||||
temp_buffer_ptr_check_sum_ ^= (reinterpret_cast<intptr_t>(aligned_result));
|
||||
temp_buffer_count_++;
|
||||
return aligned_result;
|
||||
}
|
||||
|
||||
void SingleArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) {
|
||||
temp_buffer_ptr_check_sum_ ^= (reinterpret_cast<intptr_t>(temp_buf));
|
||||
temp_buffer_count_--;
|
||||
}
|
||||
|
||||
bool SingleArenaBufferAllocator::IsAllTempDeallocated() {
|
||||
if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) {
|
||||
MicroPrintf(
|
||||
"Number of allocated temp buffers: %d. Checksum passing status: %d",
|
||||
temp_buffer_count_, !temp_buffer_ptr_check_sum_);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TfLiteStatus SingleArenaBufferAllocator::ResetTempAllocations() {
|
||||
// TODO(b/209453859): enable error check based on IsAllTempDeallocated after
|
||||
// all AllocateTemp have been paird with DeallocateTemp
|
||||
if (!IsAllTempDeallocated()) {
|
||||
MicroPrintf(
|
||||
"All temp buffers must be freed before calling ResetTempAllocations()");
|
||||
return kTfLiteError;
|
||||
}
|
||||
temp_ = head_;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
uint8_t* SingleArenaBufferAllocator::GetOverlayMemoryAddress() const {
|
||||
return buffer_head_;
|
||||
}
|
||||
|
||||
size_t SingleArenaBufferAllocator::GetNonPersistentUsedBytes() const {
|
||||
return std::max(head_ - buffer_head_, temp_ - buffer_head_);
|
||||
}
|
||||
|
||||
size_t SingleArenaBufferAllocator::GetPersistentUsedBytes() const {
|
||||
return buffer_tail_ - tail_;
|
||||
}
|
||||
|
||||
size_t SingleArenaBufferAllocator::GetAvailableMemory(size_t alignment) const {
|
||||
uint8_t* const aligned_temp = AlignPointerUp(temp_, alignment);
|
||||
uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment);
|
||||
return aligned_tail - aligned_temp;
|
||||
}
|
||||
|
||||
size_t SingleArenaBufferAllocator::GetUsedBytes() const {
|
||||
return GetPersistentUsedBytes() + GetNonPersistentUsedBytes();
|
||||
}
|
||||
|
||||
size_t SingleArenaBufferAllocator::GetBufferSize() const {
|
||||
return buffer_tail_ - buffer_head_;
|
||||
}
|
||||
|
||||
uint8_t* SingleArenaBufferAllocator::head() const { return head_; }
|
||||
|
||||
uint8_t* SingleArenaBufferAllocator::tail() const { return tail_; }
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,152 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
|
||||
#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h"
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// TODO(petewarden): This allocator never frees up or reuses any memory, even
|
||||
// though we have enough information about lifetimes of the tensors to do so.
|
||||
// This makes it pretty wasteful, so we should use a more intelligent method.
|
||||
class SingleArenaBufferAllocator : public INonPersistentBufferAllocator,
|
||||
public IPersistentBufferAllocator {
|
||||
public:
|
||||
// TODO(b/157615197): Cleanup constructors/destructor and use factory
|
||||
// functions.
|
||||
SingleArenaBufferAllocator(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head, uint8_t* buffer_tail);
|
||||
SingleArenaBufferAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
|
||||
size_t buffer_size);
|
||||
virtual ~SingleArenaBufferAllocator();
|
||||
|
||||
// Creates a new SingleArenaBufferAllocator from a given buffer head and size.
|
||||
static SingleArenaBufferAllocator* Create(ErrorReporter* error_reporter,
|
||||
uint8_t* buffer_head,
|
||||
size_t buffer_size);
|
||||
|
||||
// Resizes a buffer that is previously returned by the
|
||||
// AllocateResizableBuffer. In current implementation, it Adjusts the head
|
||||
// (lowest address and moving upwards) memory allocation to a given size.
|
||||
// Calls to this method will also invalidate all temporary allocation values
|
||||
// (it sets the location of temp space at the end of the head section). This
|
||||
// call will fail if a chain of allocations through AllocateTemp() have not
|
||||
// been cleaned up with a call to ResetTempAllocations().
|
||||
virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
|
||||
size_t alignment) override;
|
||||
|
||||
// Returns a buffer that is resizable viable ResizeBuffer(). Only one
|
||||
// resizable buffer is currently supported.
|
||||
virtual uint8_t* AllocateResizableBuffer(size_t size,
|
||||
size_t alignment) override;
|
||||
|
||||
// Frees up the memory occupied by the resizable buffer
|
||||
virtual TfLiteStatus DeallocateResizableBuffer(
|
||||
uint8_t* resizable_buf) override;
|
||||
|
||||
// Reserves the non-persistent memory that is planned by the memory planner.
|
||||
virtual TfLiteStatus ReserveNonPersistentOverlayMemory(
|
||||
size_t size, size_t alignment) override;
|
||||
|
||||
// Allocates persistent memory starting at the tail of the arena (highest
|
||||
// address and moving downwards).
|
||||
virtual uint8_t* AllocatePersistentBuffer(size_t size,
|
||||
size_t alignment) override;
|
||||
|
||||
// Allocates a temporary buffer from the head of the arena (lowest address and
|
||||
// moving upwards) but does not update the actual head allocation size or
|
||||
// position. The returned buffer is guaranteed until either
|
||||
// ResetTempAllocations() is called or another call to AllocateFromHead().
|
||||
// Repeat calls to this function will create a chain of temp allocations. All
|
||||
// calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
|
||||
// AllocateFromHead() is called before a call to ResetTempAllocations(), it
|
||||
// will fail with an error message.
|
||||
virtual uint8_t* AllocateTemp(size_t size, size_t alignment) override;
|
||||
|
||||
// Signals that a temporary buffer is no longer needed. This is currently for
|
||||
// book-keeping purpose and the memory region are not immediately available
|
||||
// for re-use. The deallocated memory region are only reclaimed after
|
||||
// ResetTempAllocations is called as it is right now.
|
||||
virtual void DeallocateTemp(uint8_t* buf) override;
|
||||
|
||||
// Returns true if all temporary buffers are already deallocated.
|
||||
virtual bool IsAllTempDeallocated() override;
|
||||
|
||||
// Resets a chain of temporary allocations back to the current head of the
|
||||
// arena (lowest address).
|
||||
virtual TfLiteStatus ResetTempAllocations() override;
|
||||
|
||||
// Returns a pointer to the buffer currently assigned to the head section.
|
||||
// This buffer is set by calling SetHeadSize().
|
||||
uint8_t* GetOverlayMemoryAddress() const override;
|
||||
|
||||
// Returns the size of the head section in bytes.
|
||||
size_t GetNonPersistentUsedBytes() const override;
|
||||
|
||||
// Returns the size of all allocations in the tail section in bytes.
|
||||
size_t GetPersistentUsedBytes() const override;
|
||||
|
||||
// Returns the number of bytes available with a given alignment. This number
|
||||
// takes in account any temporary allocations.
|
||||
size_t GetAvailableMemory(size_t alignment) const override;
|
||||
|
||||
// Returns the number of used bytes in the allocator. This number takes in
|
||||
// account any temporary allocations.
|
||||
size_t GetUsedBytes() const;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
|
||||
protected:
|
||||
// Returns a pointer to the current end of the head buffer.
|
||||
uint8_t* head() const;
|
||||
|
||||
// Returns a pointer to the current end of the tail buffer.
|
||||
uint8_t* tail() const;
|
||||
|
||||
private:
|
||||
size_t GetBufferSize() const;
|
||||
|
||||
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
|
||||
ErrorReporter* error_reporter_;
|
||||
#endif
|
||||
uint8_t* buffer_head_;
|
||||
uint8_t* buffer_tail_;
|
||||
uint8_t* head_;
|
||||
uint8_t* tail_;
|
||||
uint8_t* temp_;
|
||||
|
||||
// The combination of the checksum of outstanding temporary buffer pointers
|
||||
// AND the count of outstanding temporary buffer provide a low cost mechanism
|
||||
// to audit temporary buffers' allocation and deallocation.
|
||||
//
|
||||
// XOR Check sum for outstanding temp buffers.
|
||||
// If all temp buffers are deallocated OR no temp buffers are allocated,
|
||||
// temp_buffer_ptr_check_sum_ == nullptr.
|
||||
intptr_t temp_buffer_ptr_check_sum_ = 0;
|
||||
// Count of outstanding temp buffers.
|
||||
int temp_buffer_count_ = 0;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
|
||||
@@ -1,32 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
|
||||
#define TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
|
||||
|
||||
// C++ will automatically create class-specific delete operators for virtual
|
||||
// objects, which by default call the global delete function. For embedded
|
||||
// applications we want to avoid this, and won't be calling new/delete on these
|
||||
// objects, so we need to override the default implementation with one that does
|
||||
// nothing to avoid linking in ::delete().
|
||||
// This macro needs to be included in all subclasses of a virtual base class in
|
||||
// the private section.
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
#define TF_LITE_REMOVE_VIRTUAL_DELETE \
|
||||
void operator delete(void* p) {}
|
||||
#else
|
||||
#define TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
#endif
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
|
||||
@@ -1,50 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Reference implementation of the DebugLog() function that's required for a
|
||||
// platform to support the TensorFlow Lite for Microcontrollers library. This is
|
||||
// the only function that's absolutely required to be available on a target
|
||||
// device, since it's used for communicating test results back to the host so
|
||||
// that we can verify the implementation is working correctly.
|
||||
// It's designed to be as easy as possible to supply an implementation though.
|
||||
// On platforms that have a POSIX stack or C library, it can be written as a
|
||||
// single call to `fprintf(stderr, "%s", s)` to output a string to the error
|
||||
// stream of the console, but if there's no OS or C library available, there's
|
||||
// almost always an equivalent way to write out a string to some serial
|
||||
// interface that can be used instead. For example on Arm M-series MCUs, calling
|
||||
// the `bkpt #0xAB` assembler instruction will output the string in r1 to
|
||||
// whatever debug serial connection is available. If you're running mbed, you
|
||||
// can do the same by creating `Serial pc(USBTX, USBRX)` and then calling
|
||||
// `pc.printf("%s", s)`.
|
||||
// To add an equivalent function for your own platform, create your own
|
||||
// implementation file, and place it in a subfolder with named after the OS
|
||||
// you're targeting. For example, see the Cortex M bare metal version in
|
||||
// tensorflow/lite/micro/bluepill/debug_log.cc or the mbed one on
|
||||
// tensorflow/lite/micro/mbed/debug_log.cc.
|
||||
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
extern "C" void DebugLog(const char* s) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
// Reusing TF_LITE_STRIP_ERROR_STRINGS to disable DebugLog completely to get
|
||||
// maximum reduction in binary size. This is because we have DebugLog calls
|
||||
// via TF_LITE_CHECK that are not stubbed out by TF_LITE_REPORT_ERROR.
|
||||
fprintf(stderr, "%s", s);
|
||||
#endif
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
#define TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
// This function should be implemented by each target platform, and provide a
|
||||
// way for strings to be output to some text stream. For more information, see
|
||||
// tensorflow/lite/micro/debug_log.cc.
|
||||
void DebugLog(const char* s);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_DEBUG_LOG_H_
|
||||
@@ -1,107 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/fake_micro_context.h"
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h"
|
||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||
#include "tensorflow/lite/micro/micro_arena_constants.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
// Dummy static variables to allow creation of dummy MicroAllocator.
|
||||
// All tests are guarateed to run serially.
|
||||
static constexpr int KDummyTensorArenaSize = 256;
|
||||
static uint8_t dummy_tensor_arena[KDummyTensorArenaSize];
|
||||
} // namespace
|
||||
|
||||
FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors,
|
||||
SingleArenaBufferAllocator* allocator,
|
||||
MicroGraph* micro_graph)
|
||||
: MicroContext(
|
||||
MicroAllocator::Create(dummy_tensor_arena, KDummyTensorArenaSize,
|
||||
GetMicroErrorReporter()),
|
||||
nullptr, micro_graph),
|
||||
tensors_(tensors),
|
||||
allocator_(allocator) {}
|
||||
|
||||
TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) {
|
||||
allocated_tensor_count_++;
|
||||
return &tensors_[tensor_index];
|
||||
}
|
||||
|
||||
void FakeMicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
|
||||
allocated_tensor_count_--;
|
||||
}
|
||||
|
||||
bool FakeMicroContext::IsAllTempTfLiteTensorDeallocated() {
|
||||
return !allocated_tensor_count_;
|
||||
}
|
||||
|
||||
TfLiteEvalTensor* FakeMicroContext::GetEvalTensor(int tensor_index) {
|
||||
TfLiteEvalTensor* eval_tensor =
|
||||
reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocateTemp(
|
||||
sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
|
||||
TFLITE_DCHECK(eval_tensor != nullptr);
|
||||
|
||||
// In unit tests, the TfLiteTensor pointer contains the source of truth for
|
||||
// buffers and values:
|
||||
eval_tensor->data = tensors_[tensor_index].data;
|
||||
eval_tensor->dims = tensors_[tensor_index].dims;
|
||||
eval_tensor->type = tensors_[tensor_index].type;
|
||||
return eval_tensor;
|
||||
}
|
||||
|
||||
void* FakeMicroContext::AllocatePersistentBuffer(size_t bytes) {
|
||||
// FakeMicroContext use SingleArenaBufferAllocator, which does not
|
||||
// automatically apply the buffer alignment like MicroAllocator. The buffer
|
||||
// alignment is potentially wasteful but allows the fake_micro_context to work
|
||||
// correctly with optimized kernels.
|
||||
return allocator_->AllocatePersistentBuffer(bytes,
|
||||
MicroArenaBufferAlignment());
|
||||
}
|
||||
|
||||
TfLiteStatus FakeMicroContext::RequestScratchBufferInArena(size_t bytes,
|
||||
int* buffer_index) {
|
||||
TFLITE_DCHECK(buffer_index != nullptr);
|
||||
|
||||
if (scratch_buffer_count_ == kNumScratchBuffers_) {
|
||||
MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).",
|
||||
kNumScratchBuffers_);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
// For tests, we allocate scratch buffers from the tail and keep them around
|
||||
// for the lifetime of model. This means that the arena size in the tests will
|
||||
// be more than what we would have if the scratch buffers could share memory.
|
||||
scratch_buffers_[scratch_buffer_count_] =
|
||||
allocator_->AllocatePersistentBuffer(bytes, MicroArenaBufferAlignment());
|
||||
TFLITE_DCHECK(scratch_buffers_[scratch_buffer_count_] != nullptr);
|
||||
|
||||
*buffer_index = scratch_buffer_count_++;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* FakeMicroContext::GetScratchBuffer(int buffer_index) {
|
||||
TFLITE_DCHECK(scratch_buffer_count_ <= kNumScratchBuffers_);
|
||||
if (buffer_index >= scratch_buffer_count_) {
|
||||
return nullptr;
|
||||
}
|
||||
return scratch_buffers_[buffer_index];
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,56 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
|
||||
#define TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
|
||||
|
||||
#include "tensorflow/lite/micro/micro_context.h"
|
||||
#include "tensorflow/lite/micro/micro_graph.h"
|
||||
|
||||
namespace tflite {
|
||||
// A fake of MicroContext for kernel util tests.
|
||||
class FakeMicroContext : public MicroContext {
|
||||
public:
|
||||
FakeMicroContext(TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator,
|
||||
MicroGraph* micro_graph);
|
||||
|
||||
void* AllocatePersistentBuffer(size_t bytes) override;
|
||||
TfLiteStatus RequestScratchBufferInArena(size_t bytes,
|
||||
int* buffer_index) override;
|
||||
void* GetScratchBuffer(int buffer_index) override;
|
||||
|
||||
TfLiteTensor* AllocateTempTfLiteTensor(int tensor_index) override;
|
||||
void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) override;
|
||||
bool IsAllTempTfLiteTensorDeallocated();
|
||||
|
||||
TfLiteEvalTensor* GetEvalTensor(int tensor_index) override;
|
||||
|
||||
private:
|
||||
static constexpr int kNumScratchBuffers_ = 12;
|
||||
|
||||
int scratch_buffer_count_ = 0;
|
||||
uint8_t* scratch_buffers_[kNumScratchBuffers_];
|
||||
|
||||
TfLiteTensor* tensors_;
|
||||
int allocated_tensor_count_ = 0;
|
||||
|
||||
SingleArenaBufferAllocator* allocator_;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
|
||||
@@ -1,84 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/flatbuffer_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size)
|
||||
: flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {}
|
||||
|
||||
int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const {
|
||||
const uint8_t* elem = data_ + i * byte_width_;
|
||||
return ::flexbuffers::ReadInt64(elem, byte_width_);
|
||||
}
|
||||
|
||||
uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const {
|
||||
const uint8_t* elem = data_ + i * byte_width_;
|
||||
return ::flexbuffers::ReadUInt64(elem, byte_width_);
|
||||
}
|
||||
|
||||
int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const {
|
||||
return static_cast<int32_t>(ElementAsInt64(i));
|
||||
}
|
||||
|
||||
bool FlexbufferWrapper::ElementAsBool(size_t i) const {
|
||||
return static_cast<bool>(ElementAsUInt64(i));
|
||||
}
|
||||
|
||||
double FlexbufferWrapper::ElementAsDouble(size_t i) const {
|
||||
const uint8_t* elem = data_ + i * byte_width_;
|
||||
return ::flexbuffers::ReadDouble(elem, byte_width_);
|
||||
}
|
||||
|
||||
float FlexbufferWrapper::ElementAsFloat(size_t i) const {
|
||||
return static_cast<float>(FlexbufferWrapper::ElementAsDouble(i));
|
||||
}
|
||||
|
||||
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
|
||||
uint32_t NumSubgraphOperators(const SubGraph* subgraph) {
|
||||
if (subgraph->operators() != nullptr) {
|
||||
return subgraph->operators()->size();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
|
||||
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) {
|
||||
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
|
||||
return NumSubgraphOperators(subgraph);
|
||||
}
|
||||
|
||||
TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray(
|
||||
const flatbuffers::Vector<int32_t>* flatbuffer_array) {
|
||||
// On little-endian machines, TfLiteIntArray happens to have the same memory
|
||||
// layout as flatbuffers:Vector<int32_t>, so we can reinterpret_cast the
|
||||
// flatbuffer vector and avoid a copy and malloc.
|
||||
// TODO(b/188459715): audit this usage of const_cast.
|
||||
return const_cast<TfLiteIntArray*>(
|
||||
reinterpret_cast<const TfLiteIntArray*>(flatbuffer_array));
|
||||
}
|
||||
|
||||
TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray(
|
||||
const flatbuffers::Vector<float>* flatbuffer_array) {
|
||||
// On little-endian machines, TfLiteFloatArray happens to have the same memory
|
||||
// layout as flatbuffers:Vector<float>, so we can reinterpret_cast the
|
||||
// flatbuffer vector and avoid a copy and malloc.
|
||||
// TODO(b/188459715): audit this usage of const_cast.
|
||||
return const_cast<TfLiteFloatArray*>(
|
||||
reinterpret_cast<const TfLiteFloatArray*>(flatbuffer_array));
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,65 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
|
||||
#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
|
||||
|
||||
#include "flatbuffers/flatbuffers.h"
|
||||
#include "flatbuffers/flexbuffers.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
// Kernels use flexbuffers::Map to pack their init parameters in a tflite file,
|
||||
// with the parameter names as map keys and the parameter values as the
|
||||
// corresponding map values.
|
||||
// Accessing the map values using the flexbuffers:Map class is inline heavy,
|
||||
// which can cause the code size to bloat beyond what's reasonable for a micro
|
||||
// application. Use this class instead, when possible.
|
||||
// FlexbufferWrapper takes advantage of the following properties of
|
||||
// flexbuffers::Map:
|
||||
// 1. It can be viewed as a flexbuffers::Vector of the values.
|
||||
// 2. The values in the vector are ordered alphabetically by their keys.
|
||||
// 3. All integer and Boolean values are stored as 64-bit numbers.
|
||||
// 4. All floating point values are stored as double precision numbers.
|
||||
// The properties are mentioned in the flexbuffers docs, but we rely on
|
||||
// a unit test to catch design changes.
|
||||
class FlexbufferWrapper : public flexbuffers::Vector {
|
||||
public:
|
||||
// Construct with a serialized flexbuffer 'buffer' of 'size' bytes
|
||||
explicit FlexbufferWrapper(const uint8_t* buffer, size_t size);
|
||||
int64_t ElementAsInt64(size_t i) const;
|
||||
uint64_t ElementAsUInt64(size_t i) const;
|
||||
int32_t ElementAsInt32(size_t i) const;
|
||||
bool ElementAsBool(size_t i) const;
|
||||
double ElementAsDouble(size_t i) const;
|
||||
float ElementAsFloat(size_t i) const;
|
||||
};
|
||||
|
||||
// Return the number of operators in a subgraph tflite
|
||||
uint32_t NumSubgraphOperators(const SubGraph* subgraph);
|
||||
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx);
|
||||
|
||||
// Converts a flatbuffer array to a TfLiteArray.
|
||||
// TODO(b/188459715): These function convert a const input to a non-const via a
|
||||
// const_cast. It is unclear exactly why this is required.
|
||||
TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray(
|
||||
const flatbuffers::Vector<int32_t>* flatbuffer_array);
|
||||
TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray(
|
||||
const flatbuffers::Vector<float>* flatbuffer_array);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
|
||||
@@ -1,57 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
||||
#include "tensorflow/lite/kernels/internal/max.h"
|
||||
#include "tensorflow/lite/kernels/internal/min.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Returns the floating point value for a fused activation:
|
||||
inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
|
||||
switch (act) {
|
||||
case kTfLiteActNone:
|
||||
return a;
|
||||
case kTfLiteActRelu:
|
||||
return TfLiteMax(0.0f, a);
|
||||
case kTfLiteActReluN1To1:
|
||||
return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
|
||||
case kTfLiteActRelu6:
|
||||
return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
|
||||
case kTfLiteActTanh:
|
||||
return std::tanh(a);
|
||||
case kTfLiteActSignBit:
|
||||
return std::signbit(a);
|
||||
case kTfLiteActSigmoid:
|
||||
return 1.0f / (1.0f + std::exp(-a));
|
||||
}
|
||||
return 0.0f; // To indicate an unsupported activation (i.e. when a new fused
|
||||
// activation is added to the enum and not handled here).
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATION_UTILS_H_
|
||||
@@ -1,120 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/activations.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
|
||||
}
|
||||
|
||||
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
ReluFloat(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
tflite::ReluQuantized(data, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
MicroPrintf("Only float32 is supported currently, got %s",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
Relu6Float(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
Relu6Quantized(data.zero_int8, data.six_int8,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default: {
|
||||
MicroPrintf("Only float32 is supported currently, got %s",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_RELU() {
|
||||
return tflite::micro::RegisterOp(ReluInit, ReluPrepare, ReluEval);
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_RELU6() {
|
||||
return tflite::micro::RegisterOp(Relu6Init, Relu6Prepare, Relu6Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,63 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
extern const int kActivationsInputTensor;
|
||||
extern const int kActivationsOutputTensor;
|
||||
|
||||
struct ReluOpData {
|
||||
ReluParams params;
|
||||
};
|
||||
|
||||
struct Relu6OpData {
|
||||
int8_t six_int8;
|
||||
int8_t zero_int8;
|
||||
};
|
||||
|
||||
void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
|
||||
const RuntimeShape& output_shape, const int8_t* input_data,
|
||||
int8_t* output_data);
|
||||
|
||||
template <typename T>
|
||||
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
ReluOpData* data);
|
||||
|
||||
void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data);
|
||||
|
||||
void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data);
|
||||
|
||||
void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data);
|
||||
|
||||
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
|
||||
@@ -1,158 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/activations.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kActivationsInputTensor = 0;
|
||||
const int kActivationsOutputTensor = 0;
|
||||
|
||||
void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
|
||||
const RuntimeShape& output_shape, const int8_t* input_data,
|
||||
int8_t* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const int32_t val = static_cast<int32_t>(input_data[i]);
|
||||
int32_t clamped =
|
||||
data.params.output_offset +
|
||||
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
|
||||
data.params.output_multiplier,
|
||||
data.params.output_shift);
|
||||
clamped = std::max(data.params.quantized_activation_min, clamped);
|
||||
clamped = std::min(data.params.quantized_activation_max, clamped);
|
||||
output_data[i] = static_cast<int8_t>(clamped);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
|
||||
ReluOpData* data) {
|
||||
float act_min = 0.0;
|
||||
float act_max = std::numeric_limits<float>::infinity();
|
||||
double real_multiplier =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
|
||||
const RuntimeShape input_shape = GetTensorShape(input);
|
||||
const RuntimeShape output_shape = GetTensorShape(output);
|
||||
|
||||
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
|
||||
&data->params.output_shift);
|
||||
|
||||
data->params.quantized_activation_min = std::max(
|
||||
static_cast<int32_t>(std::numeric_limits<T>::min()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(roundf(act_min / output->params.scale)));
|
||||
data->params.quantized_activation_max =
|
||||
act_max == std::numeric_limits<float>::infinity()
|
||||
? static_cast<int32_t>(std::numeric_limits<T>::max())
|
||||
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
|
||||
output->params.zero_point +
|
||||
static_cast<int32_t>(
|
||||
roundf(act_max / output->params.scale)));
|
||||
data->params.input_offset = input->params.zero_point;
|
||||
data->params.output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const float val = input_data[i];
|
||||
const float lower = 0.0f;
|
||||
const float clamped = val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
|
||||
const RuntimeShape& output_shape, float* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const float val = input_data[i];
|
||||
const float upper = 6.0f;
|
||||
const float lower = 0.0f;
|
||||
const float clamped = val > upper ? upper : val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
|
||||
const int8_t* input_data, const RuntimeShape& output_shape,
|
||||
int8_t* output_data) {
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
for (int i = 0; i < flat_size; ++i) {
|
||||
const int8_t val = input_data[i];
|
||||
const int8_t clamped = val > upper ? upper : val < lower ? lower : val;
|
||||
output_data[i] = clamped;
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kActivationsInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kActivationsOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
CalculateReluOpData<int8_t>(input, output, data);
|
||||
}
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kActivationsInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
|
||||
input->params.zero_point);
|
||||
data->zero_int8 = input->params.zero_point;
|
||||
}
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,165 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/add.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/add.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
|
||||
const OpDataAdd* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(data->output_activation_min_f32,
|
||||
data->output_activation_max_f32, &op_params);
|
||||
if (data->requires_broadcast) {
|
||||
reference_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteAddParams* params, const OpDataAdd* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
op_params.input1_offset = data->input1_offset;
|
||||
op_params.input1_multiplier = data->input1_multiplier;
|
||||
op_params.input1_shift = data->input1_shift;
|
||||
op_params.input2_offset = data->input2_offset;
|
||||
op_params.input2_multiplier = data->input2_multiplier;
|
||||
op_params.input2_shift = data->input2_shift;
|
||||
op_params.output_offset = data->output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
SetActivationParams(data->output_activation_min, data->output_activation_max,
|
||||
&op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8: {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Add(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt16: {
|
||||
if (need_broadcast) {
|
||||
reference_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int16_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int16_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int16_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int16_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output),
|
||||
false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MicroPrintf("Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* AddInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd));
|
||||
}
|
||||
|
||||
TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kAddInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kAddInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kAddOutputTensor);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalAdd(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_ADD() {
|
||||
return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,106 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/add.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kAddInputTensor1 = 0;
|
||||
const int kAddInputTensor2 = 1;
|
||||
const int kAddOutputTensor = 0;
|
||||
|
||||
TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output, OpDataAdd* data) {
|
||||
data->requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
|
||||
// 8bit -> 8bit general quantized path, with general rescalings
|
||||
data->input1_offset = -input1->params.zero_point;
|
||||
data->input2_offset = -input2->params.zero_point;
|
||||
data->output_offset = output->params.zero_point;
|
||||
data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
|
||||
const double twice_max_input_scale =
|
||||
2 * static_cast<double>(
|
||||
std::max(input1->params.scale, input2->params.scale));
|
||||
const double real_input1_multiplier =
|
||||
static_cast<double>(input1->params.scale) / twice_max_input_scale;
|
||||
const double real_input2_multiplier =
|
||||
static_cast<double>(input2->params.scale) / twice_max_input_scale;
|
||||
const double real_output_multiplier =
|
||||
twice_max_input_scale /
|
||||
((1 << data->left_shift) * static_cast<double>(output->params.scale));
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_output_multiplier, &data->output_multiplier, &data->output_shift);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
} else if (output->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
TfLiteTensor* input1 =
|
||||
micro_context->AllocateTempInputTensor(node, kAddInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
TfLiteTensor* input2 =
|
||||
micro_context->AllocateTempInputTensor(node, kAddInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kAddOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
OpDataAdd* data = static_cast<OpDataAdd*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpDataAdd(context, params, input1, input2, output, data));
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input1);
|
||||
micro_context->DeallocateTempTfLiteTensor(input2);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,101 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_graph.h"
|
||||
#include "tensorflow/lite/micro/micro_resource_variable.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kInputVariableId = 0;
|
||||
constexpr int kInputValue = 1;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 0);
|
||||
|
||||
// This must be a TfLiteEvalTensor despite this being in Prepare, because
|
||||
// CreateTensor allocates a temp tensor from the flatbuffer, which does not
|
||||
// contain the correct ID generated within the VAR_HANDLE op. EvalTensors are
|
||||
// all allocated during StartModelAllocation which happens before
|
||||
// init/prepare, and VAR_HANDLE Prepare() references its own op_data in the
|
||||
// TfLiteEvalTensor, so reading the ID here is valid.
|
||||
const TfLiteEvalTensor* input_resource_id_tensor =
|
||||
tflite::micro::GetEvalInput(context, node, kInputVariableId);
|
||||
TFLITE_DCHECK(input_resource_id_tensor != nullptr);
|
||||
TF_LITE_ENSURE(context, (input_resource_id_tensor->type == kTfLiteResource ||
|
||||
input_resource_id_tensor->type == kTfLiteInt32));
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(input_resource_id_tensor->dims), 1);
|
||||
|
||||
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
|
||||
TfLiteTensor* input_value =
|
||||
micro_context->AllocateTempInputTensor(node, kInputValue);
|
||||
TFLITE_DCHECK(input_value != nullptr);
|
||||
|
||||
MicroGraph& graph_info = micro_context->graph();
|
||||
|
||||
MicroResourceVariables* resources = graph_info.GetResourceVariables();
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
resources->Allocate(input_resource_id_tensor->data.i32[0],
|
||||
context, input_value));
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input_value);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input_id =
|
||||
tflite::micro::GetEvalInput(context, node, kInputVariableId);
|
||||
TFLITE_DCHECK(input_id != nullptr);
|
||||
|
||||
const TfLiteEvalTensor* input_value =
|
||||
tflite::micro::GetEvalInput(context, node, kInputValue);
|
||||
TFLITE_DCHECK(input_value != nullptr);
|
||||
|
||||
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
|
||||
MicroGraph& graph_info = micro_context->graph();
|
||||
|
||||
MicroResourceVariables* resources = graph_info.GetResourceVariables();
|
||||
if (resources == nullptr) {
|
||||
MicroPrintf(
|
||||
"ASSIGN_VARIABLE requires resource variables. Please create "
|
||||
"ResourceVariables and pass it to the interpreter.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
resources->Assign(input_id->data.i32[0], input_value));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
TfLiteRegistration Register_ASSIGN_VARIABLE() {
|
||||
return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,91 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/broadcast_args.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_context.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
constexpr int kShape1Tensor = 0;
|
||||
constexpr int kShape2Tensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus BroadcastArgsPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
TfLiteTensor* shape1 =
|
||||
micro_context->AllocateTempInputTensor(node, kShape1Tensor);
|
||||
TfLiteTensor* shape2 =
|
||||
micro_context->AllocateTempInputTensor(node, kShape2Tensor);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
shape1->type == kTfLiteInt32 || shape1->type == kTfLiteInt64);
|
||||
TF_LITE_ENSURE_EQ(context, shape1->type, shape2->type);
|
||||
TF_LITE_ENSURE_EQ(context, shape1->type, output->type);
|
||||
|
||||
// Ensures the shapes are 1D tensor.
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(shape1), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(shape2), 1);
|
||||
|
||||
// Ensure the shape of the output tensor is compatible
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 1);
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(shape1);
|
||||
micro_context->DeallocateTempTfLiteTensor(shape2);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus BroadcastArgsEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* shape1 =
|
||||
micro::GetEvalInput(context, node, kShape1Tensor);
|
||||
const TfLiteEvalTensor* shape2 =
|
||||
micro::GetEvalInput(context, node, kShape2Tensor);
|
||||
TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (output->type == kTfLiteInt32) {
|
||||
reference_ops::BroadcastArgs(
|
||||
micro::GetTensorShape(shape1), micro::GetTensorData<int32_t>(shape1),
|
||||
micro::GetTensorShape(shape2), micro::GetTensorData<int32_t>(shape2),
|
||||
micro::GetTensorShape(output), micro::GetTensorData<int32_t>(output));
|
||||
} else {
|
||||
reference_ops::BroadcastArgs(
|
||||
micro::GetTensorShape(shape1), micro::GetTensorData<int64_t>(shape1),
|
||||
micro::GetTensorShape(shape2), micro::GetTensorData<int64_t>(shape2),
|
||||
micro::GetTensorShape(output), micro::GetTensorData<int64_t>(output));
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_BROADCAST_ARGS() {
|
||||
return tflite::micro::RegisterOp(nullptr, BroadcastArgsPrepare,
|
||||
BroadcastArgsEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,123 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/broadcast_to.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_context.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kShapeTensor = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
// Support a maximum of 5 dimensions in TFLM.
|
||||
constexpr int kMaxDims = 5;
|
||||
|
||||
TfLiteStatus ValidateOutputTensor(TfLiteContext* context, TfLiteTensor* input,
|
||||
TfLiteTensor* shape, TfLiteTensor* output) {
|
||||
// Ensures the shape is 1D tensor.
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(shape), 1);
|
||||
|
||||
// Ensure output dims is not less than input dims.
|
||||
int input_num_dims = NumDimensions(input);
|
||||
int output_num_dims = NumDimensions(output);
|
||||
int shape_num_dims = SizeOfDimension(shape, 0);
|
||||
TF_LITE_ENSURE_MSG(context, output_num_dims == shape_num_dims,
|
||||
"Output must match with the expected shape dimension.");
|
||||
TF_LITE_ENSURE_MSG(context, input_num_dims <= output_num_dims,
|
||||
"Output shape must be broadcastable from input shape.");
|
||||
TF_LITE_ENSURE_MSG(context, output_num_dims <= kMaxDims,
|
||||
"BroadcastTo only supports 1-5D tensor.");
|
||||
|
||||
// Check if output shape is broadcastable from input shape.
|
||||
auto get_shape_data = [shape](int i) -> int32_t {
|
||||
if (shape->type == kTfLiteInt32) {
|
||||
return GetTensorData<int32_t>(shape)[i];
|
||||
} else {
|
||||
return GetTensorData<int64_t>(shape)[i];
|
||||
}
|
||||
};
|
||||
|
||||
int extending_dims = output_num_dims - input_num_dims;
|
||||
for (int idx = 0; idx < input_num_dims; ++idx) {
|
||||
TF_LITE_ENSURE_MSG(
|
||||
context,
|
||||
(SizeOfDimension(input, idx) == 1 ||
|
||||
SizeOfDimension(input, idx) == get_shape_data(extending_dims + idx)),
|
||||
"Output shape must be broadcastable from input shape.");
|
||||
}
|
||||
|
||||
// Validating the shape of the output tensor.
|
||||
tflite::RuntimeShape output_shape = tflite::GetTensorShape(output);
|
||||
for (int idx = 0; idx < output_num_dims; ++idx) {
|
||||
TF_LITE_ENSURE(context, output_shape.Dims(idx) == get_shape_data(idx));
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus BroadcastToPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kInputTensor);
|
||||
TfLiteTensor* shape =
|
||||
micro_context->AllocateTempInputTensor(node, kShapeTensor);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
|
||||
|
||||
TF_LITE_ENSURE_MSG(context, (NumDimensions(input) <= kMaxDims),
|
||||
"BroadcastTo only supports 1-5D tensor.");
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
shape->type == kTfLiteInt32 || shape->type == kTfLiteInt64);
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
|
||||
// Does not support String type due to its variable size. This limitation is
|
||||
// the same as TFLite.
|
||||
TF_LITE_ENSURE(context, input->type != kTfLiteString);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(ValidateOutputTensor(context, input, shape, output));
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(shape);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus BroadcastToEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// BroadcastTo op support upto 5 dims, different from 8 dims in TFLite.
|
||||
reference_ops::BroadcastTo<kMaxDims>(
|
||||
micro::GetTensorShape(input), input->data.raw,
|
||||
micro::GetTensorShape(output), output->data.raw, input->type);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_BROADCAST_TO() {
|
||||
return tflite::micro::RegisterOp(nullptr, BroadcastToPrepare,
|
||||
BroadcastToEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,88 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_context.h"
|
||||
#include "tensorflow/lite/micro/micro_graph.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
int init_subgraph_index;
|
||||
bool has_run;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
const auto* params =
|
||||
reinterpret_cast<const TfLiteCallOnceParams*>(node->builtin_data);
|
||||
op_data->init_subgraph_index = params->init_subgraph_index;
|
||||
op_data->has_run = false;
|
||||
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 0);
|
||||
TF_LITE_ENSURE(context, NumOutputs(node) == 0);
|
||||
|
||||
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
|
||||
MicroGraph& graph_info = micro_context->graph();
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
op_data->init_subgraph_index < graph_info.NumSubgraphs());
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
// Call once only runs one time then is a no-op for every subsequent call.
|
||||
if (op_data->has_run) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
|
||||
MicroGraph& graph_info = micro_context->graph();
|
||||
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
graph_info.InvokeSubgraph(op_data->init_subgraph_index));
|
||||
|
||||
op_data->has_run = true;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
TfLiteRegistration Register_CALL_ONCE() {
|
||||
return tflite::micro::RegisterOp(Init, Prepare, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,114 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
template <typename FromT, typename ToT>
|
||||
void copyCast(const FromT* in, ToT* out, int num_elements) {
|
||||
std::transform(in, in + num_elements, out,
|
||||
[](FromT a) { return static_cast<ToT>(a); });
|
||||
}
|
||||
|
||||
template <typename FromT>
|
||||
TfLiteStatus copyToTensor(TfLiteContext* context, const FromT* in,
|
||||
TfLiteEvalTensor* out, int num_elements) {
|
||||
switch (out->type) {
|
||||
case kTfLiteInt8:
|
||||
copyCast(in, out->data.int8, num_elements);
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
copyCast(in, out->data.i16, num_elements);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
copyCast(in, out->data.i32, num_elements);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
copyCast(in, tflite::micro::GetTensorData<float>(out), num_elements);
|
||||
break;
|
||||
default:
|
||||
// Unsupported type.
|
||||
MicroPrintf("Output type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(out->type), out->type);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
int num_elements = MatchingFlatSize(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorShape(output));
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteInt8:
|
||||
return copyToTensor(context, input->data.int8, output, num_elements);
|
||||
case kTfLiteInt16:
|
||||
return copyToTensor(context, tflite::micro::GetTensorData<int16_t>(input),
|
||||
output, num_elements);
|
||||
case kTfLiteInt32:
|
||||
return copyToTensor(context, tflite::micro::GetTensorData<int32_t>(input),
|
||||
output, num_elements);
|
||||
case kTfLiteUInt32:
|
||||
return copyToTensor(context,
|
||||
tflite::micro::GetTensorData<uint32_t>(input), output,
|
||||
num_elements);
|
||||
case kTfLiteFloat32:
|
||||
return copyToTensor(context, tflite::micro::GetTensorData<float>(input),
|
||||
output, num_elements);
|
||||
default:
|
||||
// Unsupported type.
|
||||
MicroPrintf("Input type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_CAST() {
|
||||
return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,75 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/ceil.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace ceil {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
|
||||
}
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Ceil(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace ceil
|
||||
|
||||
TfLiteRegistration Register_CEIL() {
|
||||
return tflite::micro::RegisterOp(nullptr, ceil::Prepare, ceil::Eval);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -1,48 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// The CircularBuffer op has one input and one output tensor.
|
||||
extern const int kCircularBufferInputTensor;
|
||||
extern const int kCircularBufferOutputTensor;
|
||||
|
||||
// Indices into the init flexbuffer's vector.
|
||||
// The parameter's name is in the comment that follows.
|
||||
// Elements in the vectors are ordered alphabetically by parameter name.
|
||||
extern const int kCircularBufferCyclesMaxIndex; // 'cycles_max'
|
||||
|
||||
// TODO(b/149795762): Add this to TfLiteStatus enum.
|
||||
extern const TfLiteStatus kTfLiteAbort;
|
||||
|
||||
// These fields control the stride period of a strided streaming model. This op
|
||||
// returns kTfLiteAbort until cycles_until_run-- is zero. At this time,
|
||||
// cycles_until_run is reset to cycles_max.
|
||||
struct OpDataCircularBuffer {
|
||||
int cycles_until_run;
|
||||
int cycles_max;
|
||||
};
|
||||
|
||||
TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_
|
||||
@@ -1,97 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/flatbuffer_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/circular_buffer.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// The CircularBuffer op has one input and one output tensor.
|
||||
const int kCircularBufferInputTensor = 0;
|
||||
const int kCircularBufferOutputTensor = 0;
|
||||
|
||||
// Indices into the init flexbuffer's vector.
|
||||
// The parameter's name is in the comment that follows.
|
||||
// Elements in the vectors are ordered alphabetically by parameter name.
|
||||
const int kCircularBufferCyclesMaxIndex = 0; // 'cycles_max'
|
||||
|
||||
// TODO(b/149795762): Add this to TfLiteStatus enum.
|
||||
const TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
|
||||
|
||||
TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kCircularBufferInputTensor);
|
||||
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
|
||||
node, kCircularBufferOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpDataCircularBuffer* op_data =
|
||||
static_cast<OpDataCircularBuffer*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]);
|
||||
TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]);
|
||||
TF_LITE_ENSURE_EQ(context, input->dims->data[2], output->dims->data[2]);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
// The circular buffer custom operator currently only supports int8.
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
|
||||
if (op_data->cycles_max <= 0) {
|
||||
// The last circular buffer layer simply accumulates outputs, and does not
|
||||
// run periodically.
|
||||
// TODO(b/150001379): Move this special case logic to the tflite flatbuffer.
|
||||
static int cb_prepare_count = 0;
|
||||
cb_prepare_count++;
|
||||
// These checks specifically work for the only two streaming models
|
||||
// supported on TFLM. They use the shape of the output tensor along with the
|
||||
// layer number to determine if the circular buffer period should be 1 or 2.
|
||||
|
||||
// These models are outlined int the following documents:
|
||||
// https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing
|
||||
// https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing
|
||||
if (output->dims->data[1] == 5 || output->dims->data[1] == 13 ||
|
||||
output->dims->data[1] == 25 ||
|
||||
(cb_prepare_count == 5 && output->dims->data[2] == 2 &&
|
||||
output->dims->data[3] == 96)) {
|
||||
op_data->cycles_max = 1;
|
||||
cb_prepare_count = 0;
|
||||
} else {
|
||||
op_data->cycles_max = 2;
|
||||
}
|
||||
}
|
||||
op_data->cycles_until_run = op_data->cycles_max;
|
||||
node->user_data = op_data;
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,22 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
|
||||
|
||||
extern const int g_gen_data_size_circular_buffer_config;
|
||||
extern const unsigned char g_gen_data_circular_buffer_config[];
|
||||
|
||||
#endif
|
||||
@@ -1,141 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/conv.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataConv));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kConvInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 3)
|
||||
? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
const auto& params =
|
||||
*(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(
|
||||
context,
|
||||
input->type == filter->type ||
|
||||
(input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
switch (input->type) { // Already know in/out types are same.
|
||||
case kTfLiteFloat32: {
|
||||
tflite::reference_ops::Conv(
|
||||
ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetOptionalTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output),
|
||||
tflite::micro::GetTensorShape(nullptr), nullptr);
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt16: {
|
||||
switch (bias->type) {
|
||||
case kTfLiteInt32: {
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
ConvParamsQuantized(params, data),
|
||||
data.per_channel_output_multiplier, data.per_channel_output_shift,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetOptionalTensorData<std::int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt64: {
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
ConvParamsQuantized(params, data),
|
||||
data.per_channel_output_multiplier, data.per_channel_output_shift,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetOptionalTensorData<std::int64_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MicroPrintf("Bias type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(bias->type), bias->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::ConvPerChannel(
|
||||
ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
|
||||
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetOptionalTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
|
||||
input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_CONV_2D() {
|
||||
return tflite::micro::RegisterOp(Init, ConvPrepare, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,112 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
struct OpDataConv {
|
||||
TfLitePaddingValues padding;
|
||||
|
||||
// Cached tensor zero point values for quantized operations.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
|
||||
// Per channel output multiplier and shift.
|
||||
int32_t* per_channel_output_multiplier;
|
||||
int32_t* per_channel_output_shift;
|
||||
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
extern const int kConvInputTensor;
|
||||
extern const int kConvWeightsTensor;
|
||||
extern const int kConvBiasTensor;
|
||||
extern const int kConvOutputTensor;
|
||||
extern const int kConvQuantizedDimension;
|
||||
|
||||
// Returns a ConvParams struct with all the parameters needed for a
|
||||
// float computation.
|
||||
ConvParams ConvParamsFloat(const TfLiteConvParams& params,
|
||||
const OpDataConv& data);
|
||||
|
||||
// Returns a ConvParams struct with all the parameters needed for a
|
||||
// quantized computation.
|
||||
ConvParams ConvParamsQuantized(const TfLiteConvParams& params,
|
||||
const OpDataConv& data);
|
||||
|
||||
TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteConvParams& params, int width,
|
||||
int height, int filter_width,
|
||||
int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpDataConv* data);
|
||||
|
||||
TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
// This is the most generic TfLiteRegistration. The actual supported types may
|
||||
// still be target dependent. The only requirement is that every implementation
|
||||
// (reference or optimized) must define this function.
|
||||
TfLiteRegistration Register_CONV_2D();
|
||||
|
||||
#if defined(XTENSA)
|
||||
// Returns a TfLiteRegistration struct for kernel variant that only supports
|
||||
// int8 activations and int8 weights and always calls the reference
|
||||
// implementation.
|
||||
TfLiteRegistration Register_CONV_2D_INT8REF();
|
||||
#else
|
||||
inline TfLiteRegistration Register_CONV_2D_INT8REF() {
|
||||
return Register_CONV_2D();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CMSIS_NN)
|
||||
// Returns a TfLiteRegistration struct for kernel variant that only supports
|
||||
// int8 activations and int8 weights and uses the latency optimized
|
||||
// implementations.
|
||||
TfLiteRegistration Register_CONV_2D_INT8();
|
||||
|
||||
// Returns a TfLiteRegistration struct for kernel variant that only supports
|
||||
// int16 activations and int8 weights and uses the latency optimized
|
||||
// implementations.
|
||||
TfLiteRegistration Register_CONV_2D_INT16();
|
||||
|
||||
#else
|
||||
inline TfLiteRegistration Register_CONV_2D_INT8() { return Register_CONV_2D(); }
|
||||
|
||||
inline TfLiteRegistration Register_CONV_2D_INT16() {
|
||||
return Register_CONV_2D();
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
|
||||
@@ -1,197 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/conv.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kConvInputTensor = 0;
|
||||
const int kConvWeightsTensor = 1;
|
||||
const int kConvBiasTensor = 2;
|
||||
const int kConvOutputTensor = 0;
|
||||
|
||||
// Conv is quantized along dimension 0:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
const int kConvQuantizedDimension = 0;
|
||||
|
||||
// Returns a ConvParams struct with all the parameters needed for a
|
||||
// float computation.
|
||||
ConvParams ConvParamsFloat(const TfLiteConvParams& params,
|
||||
const OpDataConv& data) {
|
||||
ConvParams op_params;
|
||||
CalculateActivationRange(params.activation, &op_params.float_activation_min,
|
||||
&op_params.float_activation_max);
|
||||
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params.stride_width;
|
||||
op_params.stride_height = params.stride_height;
|
||||
op_params.dilation_width_factor = params.dilation_width_factor;
|
||||
op_params.dilation_height_factor = params.dilation_height_factor;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
// Returns a ConvParams struct with all the parameters needed for a
|
||||
// quantized computation.
|
||||
ConvParams ConvParamsQuantized(const TfLiteConvParams& params,
|
||||
const OpDataConv& data) {
|
||||
ConvParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.stride_height = params.stride_height;
|
||||
op_params.stride_width = params.stride_width;
|
||||
op_params.dilation_height_factor = params.dilation_height_factor;
|
||||
op_params.dilation_width_factor = params.dilation_width_factor;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteConvParams& params, int width,
|
||||
int height, int filter_width,
|
||||
int filter_height, int out_width,
|
||||
int out_height, const TfLiteType data_type,
|
||||
OpDataConv* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params.padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params.stride_height, params.stride_width, params.dilation_height_factor,
|
||||
params.dilation_width_factor, height, width, filter_height, filter_width,
|
||||
padding, &out_height, &out_width);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* filter =
|
||||
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
TfLiteTensor* bias =
|
||||
micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
int output_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params.activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier, data->per_channel_output_shift,
|
||||
output_channels));
|
||||
}
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(filter);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
micro_context->DeallocateTempTfLiteTensor(bias);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
|
||||
const auto& params =
|
||||
*(static_cast<const TfLiteConvParams*>(node->builtin_data));
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* filter =
|
||||
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
const int input_width = input->dims->data[2];
|
||||
const int input_height = input->dims->data[1];
|
||||
const int filter_width = filter->dims->data[2];
|
||||
const int filter_height = filter->dims->data[1];
|
||||
const int output_width = output->dims->data[2];
|
||||
const int output_height = output->dims->data[1];
|
||||
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TFLITE_DCHECK(affine_quantization != nullptr);
|
||||
TFLITE_DCHECK(affine_quantization->scale != nullptr);
|
||||
TFLITE_DCHECK(affine_quantization->zero_point != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kConvQuantizedDimension]);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpDataConv(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(filter);
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace tflite
|
||||
@@ -1,113 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
|
||||
#include "tensorflow/lite/micro/kernels/micro_ops.h"
|
||||
#include "tensorflow/lite/micro/test_helpers.h"
|
||||
#include "tensorflow/lite/micro/testing/micro_test.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace testing {
|
||||
|
||||
TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
|
||||
int output_length, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, float* output_data);
|
||||
|
||||
TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
|
||||
int output_length, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, int8_t* output_data);
|
||||
|
||||
TfLiteStatus InvokeConv(TfLiteTensor* tensors, int tensors_size,
|
||||
int output_length, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, uint8_t* output_data);
|
||||
|
||||
TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
|
||||
const float* expected_output_data,
|
||||
int output_length,
|
||||
TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration,
|
||||
float* output_data, float tolerance = 1e-5);
|
||||
|
||||
TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
|
||||
const int8_t* expected_output_data,
|
||||
int output_length,
|
||||
TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration,
|
||||
int8_t* output_data, float tolerance = 1e-5);
|
||||
|
||||
TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
|
||||
const uint8_t* expected_output_data,
|
||||
int output_length,
|
||||
TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration,
|
||||
uint8_t* output_data, float tolerance = 1e-5);
|
||||
|
||||
TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data,
|
||||
int* filter_dims_data, const float* filter_data,
|
||||
int* bias_dims_data, const float* bias_data,
|
||||
int* output_dims_data,
|
||||
const float* expected_output_data,
|
||||
TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, float* output_data);
|
||||
|
||||
TfLiteStatus TestConvQuantizedPerLayer(
|
||||
int* input_dims_data, const float* input_data, uint8_t* input_quantized,
|
||||
float input_scale, int* filter_dims_data, const float* filter_data,
|
||||
uint8_t* filter_quantized, float filter_scale, int* bias_dims_data,
|
||||
const float* bias_data, int32_t* bias_quantized, int* output_dims_data,
|
||||
const float* expected_output_data, uint8_t* expected_output_quantized,
|
||||
float output_scale, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, uint8_t* output_data);
|
||||
|
||||
TfLiteStatus TestConvQuantizedPerChannel(
|
||||
int* input_dims_data, const float* input_data, int8_t* input_quantized,
|
||||
float input_scale, int input_zero_point, int* filter_dims_data,
|
||||
const float* filter_data, int8_t* filter_data_quantized,
|
||||
int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
|
||||
float* bias_scales, int* bias_zero_points, int* output_dims_data,
|
||||
const float* expected_output_data, int8_t* expected_output_data_quantized,
|
||||
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, int8_t* output_data);
|
||||
|
||||
TfLiteStatus TestConvQuantizedPerChannel(
|
||||
int* input_dims_data, const float* input_data, int16_t* input_quantized,
|
||||
float input_scale, int input_zero_point, int* filter_dims_data,
|
||||
const float* filter_data, int8_t* filter_data_quantized,
|
||||
int* bias_dims_data, const float* bias_data,
|
||||
std::int64_t* bias_data_quantized, float* bias_scales,
|
||||
int* bias_zero_points, int* output_dims_data,
|
||||
const float* expected_output_data, int16_t* expected_output_data_quantized,
|
||||
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, int16_t* output_data);
|
||||
|
||||
TfLiteStatus TestConvQuantizedPerChannel(
|
||||
int* input_dims_data, const float* input_data, int16_t* input_quantized,
|
||||
float input_scale, int input_zero_point, int* filter_dims_data,
|
||||
const float* filter_data, int8_t* filter_data_quantized,
|
||||
int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
|
||||
float* bias_scales, int* bias_zero_points, int* output_dims_data,
|
||||
const float* expected_output_data, int16_t* expected_output_data_quantized,
|
||||
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
|
||||
TfLiteRegistration registration, int16_t* output_data);
|
||||
|
||||
} // namespace testing
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
|
||||
@@ -1,80 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/micro/kernels/conv.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
extern const int kDepthwiseConvInputTensor;
|
||||
extern const int kDepthwiseConvWeightsTensor;
|
||||
extern const int kDepthwiseConvBiasTensor;
|
||||
extern const int kDepthwiseConvOutputTensor;
|
||||
extern const int kDepthwiseConvQuantizedDimension;
|
||||
|
||||
// Returns a DepthwiseParams struct with all the parameters needed for a
|
||||
// float computation.
|
||||
DepthwiseParams DepthwiseConvParamsFloat(
|
||||
const TfLiteDepthwiseConvParams& params, const OpDataConv& data);
|
||||
|
||||
// Returns a DepthwiseParams struct with all the parameters needed for a
|
||||
// quantized computation.
|
||||
DepthwiseParams DepthwiseConvParamsQuantized(
|
||||
const TfLiteDepthwiseConvParams& params, const OpDataConv& data);
|
||||
|
||||
TfLiteStatus CalculateOpDataDepthwiseConv(
|
||||
TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteDepthwiseConvParams& params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width, int out_height,
|
||||
const TfLiteType data_type, OpDataConv* data);
|
||||
|
||||
TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
// This is the most generic TfLiteRegistration. The actual supported types may
|
||||
// still be target dependent. The only requirement is that every implementation
|
||||
// (reference or optimized) must define this function.
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
|
||||
|
||||
#if defined(CMSIS_NN)
|
||||
// Returns a TfLiteRegistration struct for kernel variant that only supports
|
||||
// int8 activations and int8 weights and uses the latency optimized
|
||||
// implementations.
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8();
|
||||
|
||||
// Returns a TfLiteRegistration struct for kernel variant that only supports
|
||||
// int16 activations and int8 weights and uses the latency optimized
|
||||
// implementations.
|
||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16();
|
||||
|
||||
#else
|
||||
inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8() {
|
||||
return Register_DEPTHWISE_CONV_2D();
|
||||
}
|
||||
|
||||
inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16() {
|
||||
return Register_DEPTHWISE_CONV_2D();
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_
|
||||
@@ -1,202 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/depthwise_conv.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kDepthwiseConvInputTensor = 0;
|
||||
const int kDepthwiseConvWeightsTensor = 1;
|
||||
const int kDepthwiseConvBiasTensor = 2;
|
||||
const int kDepthwiseConvOutputTensor = 0;
|
||||
|
||||
// DepthwiseConv is quantized along dimension 3:
|
||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||
const int kDepthwiseConvQuantizedDimension = 3;
|
||||
|
||||
// Returns a DepthwiseParams struct with all the parameters needed for a
|
||||
// float computation.
|
||||
DepthwiseParams DepthwiseConvParamsFloat(
|
||||
const TfLiteDepthwiseConvParams& params, const OpDataConv& data) {
|
||||
DepthwiseParams op_params;
|
||||
CalculateActivationRange(params.activation, &op_params.float_activation_min,
|
||||
&op_params.float_activation_max);
|
||||
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.stride_width = params.stride_width;
|
||||
op_params.stride_height = params.stride_height;
|
||||
op_params.dilation_width_factor = params.dilation_width_factor;
|
||||
op_params.dilation_height_factor = params.dilation_height_factor;
|
||||
op_params.depth_multiplier = params.depth_multiplier;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
// Returns a DepthwiseParams struct with all the parameters needed for a
|
||||
// quantized computation.
|
||||
DepthwiseParams DepthwiseConvParamsQuantized(
|
||||
const TfLiteDepthwiseConvParams& params, const OpDataConv& data) {
|
||||
DepthwiseParams op_params;
|
||||
op_params.input_offset = -data.input_zero_point;
|
||||
op_params.weights_offset = -data.filter_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier = data.output_multiplier;
|
||||
op_params.output_shift = -data.output_shift;
|
||||
op_params.padding_type = tflite::micro::RuntimePaddingType(params.padding);
|
||||
op_params.padding_values.height = data.padding.height;
|
||||
op_params.padding_values.width = data.padding.width;
|
||||
op_params.stride_height = params.stride_height;
|
||||
op_params.stride_width = params.stride_width;
|
||||
op_params.dilation_height_factor = params.dilation_height_factor;
|
||||
op_params.dilation_width_factor = params.dilation_width_factor;
|
||||
op_params.depth_multiplier = params.depth_multiplier;
|
||||
op_params.quantized_activation_min = data.output_activation_min;
|
||||
op_params.quantized_activation_max = data.output_activation_max;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpDataDepthwiseConv(
|
||||
TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteDepthwiseConvParams& params, int width, int height,
|
||||
int filter_width, int filter_height, int out_width, int out_height,
|
||||
const TfLiteType data_type, OpDataConv* data) {
|
||||
bool has_bias = node->inputs->size == 3;
|
||||
// Check number of inputs/outputs
|
||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
|
||||
// Matching GetWindowedOutputSize in TensorFlow.
|
||||
auto padding = params.padding;
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params.stride_height, params.stride_width, params.dilation_height_factor,
|
||||
params.dilation_width_factor, height, width, filter_height, filter_width,
|
||||
padding, &out_height, &out_width);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kConvInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* filter =
|
||||
micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
TfLiteTensor* bias =
|
||||
micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
// Note that quantized inference requires that all tensors have their
|
||||
// parameters set. This is usually done during quantized training.
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
int output_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
|
||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
||||
context, input, filter, bias, output, params.activation,
|
||||
&data->output_multiplier, &data->output_shift,
|
||||
&data->output_activation_min, &data->output_activation_max,
|
||||
data->per_channel_output_multiplier, data->per_channel_output_shift,
|
||||
output_channels));
|
||||
}
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(filter);
|
||||
micro_context->DeallocateTempTfLiteTensor(bias);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
|
||||
const auto& params =
|
||||
*(static_cast<const TfLiteDepthwiseConvParams*>(node->builtin_data));
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kDepthwiseConvOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kDepthwiseConvInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* filter =
|
||||
micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
|
||||
const int input_width = input->dims->data[2];
|
||||
const int input_height = input->dims->data[1];
|
||||
const int filter_width = filter->dims->data[2];
|
||||
const int filter_height = filter->dims->data[1];
|
||||
const int output_width = output->dims->data[2];
|
||||
const int output_height = output->dims->data[1];
|
||||
|
||||
// Dynamically allocate per-channel quantization parameters.
|
||||
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||
data->per_channel_output_multiplier =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
data->per_channel_output_shift =
|
||||
static_cast<int32_t*>(context->AllocatePersistentBuffer(
|
||||
context, num_channels * sizeof(int32_t)));
|
||||
|
||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
|
||||
const auto* affine_quantization =
|
||||
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||
TFLITE_DCHECK(affine_quantization != nullptr);
|
||||
TFLITE_DCHECK(affine_quantization->scale != nullptr);
|
||||
TFLITE_DCHECK(affine_quantization->zero_point != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(
|
||||
context, affine_quantization->scale->size == 1 ||
|
||||
affine_quantization->scale->size ==
|
||||
filter->dims->data[kDepthwiseConvQuantizedDimension]);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||
affine_quantization->zero_point->size);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv(
|
||||
context, node, params, input_width, input_height, filter_width,
|
||||
filter_height, output_width, output_height, input->type, data));
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(filter);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,88 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/dequantize.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
void* DequantizeInit(TfLiteContext* context, const char* buffer,
|
||||
size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(DequantizeOpData));
|
||||
}
|
||||
|
||||
TfLiteStatus DequantizeEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
DequantizeOpData* data = static_cast<DequantizeOpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
switch (input->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::Dequantize(data->quantization_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
default:
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_DEQUANTIZE() {
|
||||
return tflite::micro::RegisterOp(DequantizeInit, DequantizePrepare,
|
||||
DequantizeEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,38 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
struct DequantizeOpData {
|
||||
tflite::DequantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_
|
||||
@@ -1,67 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/dequantize.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
DequantizeOpData* data = static_cast<DequantizeOpData*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
// TODO(b/140515557): Add cached dequant to improve hybrid model performance.
|
||||
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteInt16 ||
|
||||
input->type == kTfLiteUInt8);
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteFloat32);
|
||||
|
||||
if (output->type == kTfLiteInt32) {
|
||||
const double effective_output_scale =
|
||||
static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = input->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(input->params.scale);
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,807 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <tuple>
|
||||
|
||||
#include "flatbuffers/flexbuffers.h"
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* This version of detection_postprocess is specific to TFLite Micro. It
|
||||
* contains the following differences between the TFLite version:
|
||||
*
|
||||
* 1.) Temporaries (temporary tensors) - Micro use instead scratch buffer API.
|
||||
* 2.) Output dimensions - the TFLite version does not support undefined out
|
||||
* dimensions. So model must have static out dimensions.
|
||||
*/
|
||||
|
||||
// Input tensors
|
||||
constexpr int kInputTensorBoxEncodings = 0;
|
||||
constexpr int kInputTensorClassPredictions = 1;
|
||||
constexpr int kInputTensorAnchors = 2;
|
||||
|
||||
// Output tensors
|
||||
constexpr int kOutputTensorDetectionBoxes = 0;
|
||||
constexpr int kOutputTensorDetectionClasses = 1;
|
||||
constexpr int kOutputTensorDetectionScores = 2;
|
||||
constexpr int kOutputTensorNumDetections = 3;
|
||||
|
||||
constexpr int kNumCoordBox = 4;
|
||||
constexpr int kBatchSize = 1;
|
||||
|
||||
constexpr int kNumDetectionsPerClass = 100;
|
||||
|
||||
// Object Detection model produces axis-aligned boxes in two formats:
|
||||
// BoxCorner represents the lower left corner (xmin, ymin) and
|
||||
// the upper right corner (xmax, ymax).
|
||||
// CenterSize represents the center (xcenter, ycenter), height and width.
|
||||
// BoxCornerEncoding and CenterSizeEncoding are related as follows:
|
||||
// ycenter = y / y_scale * anchor.h + anchor.y;
|
||||
// xcenter = x / x_scale * anchor.w + anchor.x;
|
||||
// half_h = 0.5*exp(h/ h_scale)) * anchor.h;
|
||||
// half_w = 0.5*exp(w / w_scale)) * anchor.w;
|
||||
// ymin = ycenter - half_h
|
||||
// ymax = ycenter + half_h
|
||||
// xmin = xcenter - half_w
|
||||
// xmax = xcenter + half_w
|
||||
struct BoxCornerEncoding {
|
||||
float ymin;
|
||||
float xmin;
|
||||
float ymax;
|
||||
float xmax;
|
||||
};
|
||||
|
||||
struct CenterSizeEncoding {
|
||||
float y;
|
||||
float x;
|
||||
float h;
|
||||
float w;
|
||||
};
|
||||
// We make sure that the memory allocations are contiguous with static_assert.
|
||||
static_assert(sizeof(BoxCornerEncoding) == sizeof(float) * kNumCoordBox,
|
||||
"Size of BoxCornerEncoding is 4 float values");
|
||||
static_assert(sizeof(CenterSizeEncoding) == sizeof(float) * kNumCoordBox,
|
||||
"Size of CenterSizeEncoding is 4 float values");
|
||||
|
||||
struct OpData {
|
||||
int max_detections;
|
||||
int max_classes_per_detection; // Fast Non-Max-Suppression
|
||||
int detections_per_class; // Regular Non-Max-Suppression
|
||||
float non_max_suppression_score_threshold;
|
||||
float intersection_over_union_threshold;
|
||||
int num_classes;
|
||||
bool use_regular_non_max_suppression;
|
||||
CenterSizeEncoding scale_values;
|
||||
|
||||
// Scratch buffers indexes
|
||||
int active_candidate_idx;
|
||||
int decoded_boxes_idx;
|
||||
int scores_idx;
|
||||
int score_buffer_idx;
|
||||
int keep_scores_idx;
|
||||
int scores_after_regular_non_max_suppression_idx;
|
||||
int sorted_values_idx;
|
||||
int keep_indices_idx;
|
||||
int sorted_indices_idx;
|
||||
int buffer_idx;
|
||||
int selected_idx;
|
||||
|
||||
// Cached tensor scale and zero point values for quantized operations
|
||||
TfLiteQuantizationParams input_box_encodings;
|
||||
TfLiteQuantizationParams input_class_predictions;
|
||||
TfLiteQuantizationParams input_anchors;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
OpData* op_data = nullptr;
|
||||
|
||||
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
|
||||
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
|
||||
op_data = reinterpret_cast<OpData*>(
|
||||
context->AllocatePersistentBuffer(context, sizeof(OpData)));
|
||||
|
||||
op_data->max_detections = m["max_detections"].AsInt32();
|
||||
op_data->max_classes_per_detection = m["max_classes_per_detection"].AsInt32();
|
||||
if (m["detections_per_class"].IsNull())
|
||||
op_data->detections_per_class = kNumDetectionsPerClass;
|
||||
else
|
||||
op_data->detections_per_class = m["detections_per_class"].AsInt32();
|
||||
if (m["use_regular_nms"].IsNull())
|
||||
op_data->use_regular_non_max_suppression = false;
|
||||
else
|
||||
op_data->use_regular_non_max_suppression = m["use_regular_nms"].AsBool();
|
||||
|
||||
op_data->non_max_suppression_score_threshold =
|
||||
m["nms_score_threshold"].AsFloat();
|
||||
op_data->intersection_over_union_threshold = m["nms_iou_threshold"].AsFloat();
|
||||
op_data->num_classes = m["num_classes"].AsInt32();
|
||||
op_data->scale_values.y = m["y_scale"].AsFloat();
|
||||
op_data->scale_values.x = m["x_scale"].AsFloat();
|
||||
op_data->scale_values.h = m["h_scale"].AsFloat();
|
||||
op_data->scale_values.w = m["w_scale"].AsFloat();
|
||||
|
||||
return op_data;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* op_data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
// Inputs: box_encodings, scores, anchors
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
|
||||
TfLiteTensor* input_box_encodings =
|
||||
micro_context->AllocateTempInputTensor(node, kInputTensorBoxEncodings);
|
||||
TfLiteTensor* input_class_predictions =
|
||||
micro_context->AllocateTempInputTensor(node,
|
||||
kInputTensorClassPredictions);
|
||||
TfLiteTensor* input_anchors =
|
||||
micro_context->AllocateTempInputTensor(node, kInputTensorAnchors);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input_box_encodings), 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input_class_predictions), 3);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input_anchors), 2);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4);
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
const int num_classes = op_data->num_classes;
|
||||
|
||||
op_data->input_box_encodings.scale = input_box_encodings->params.scale;
|
||||
op_data->input_box_encodings.zero_point =
|
||||
input_box_encodings->params.zero_point;
|
||||
op_data->input_class_predictions.scale =
|
||||
input_class_predictions->params.scale;
|
||||
op_data->input_class_predictions.zero_point =
|
||||
input_class_predictions->params.zero_point;
|
||||
op_data->input_anchors.scale = input_anchors->params.scale;
|
||||
op_data->input_anchors.zero_point = input_anchors->params.zero_point;
|
||||
|
||||
// Scratch tensors
|
||||
context->RequestScratchBufferInArena(context, num_boxes,
|
||||
&op_data->active_candidate_idx);
|
||||
context->RequestScratchBufferInArena(context,
|
||||
num_boxes * kNumCoordBox * sizeof(float),
|
||||
&op_data->decoded_boxes_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context,
|
||||
input_class_predictions->dims->data[1] *
|
||||
input_class_predictions->dims->data[2] * sizeof(float),
|
||||
&op_data->scores_idx);
|
||||
|
||||
// Additional buffers
|
||||
context->RequestScratchBufferInArena(context, num_boxes * sizeof(float),
|
||||
&op_data->score_buffer_idx);
|
||||
context->RequestScratchBufferInArena(context, num_boxes * sizeof(float),
|
||||
&op_data->keep_scores_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, op_data->max_detections * num_boxes * sizeof(float),
|
||||
&op_data->scores_after_regular_non_max_suppression_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, op_data->max_detections * num_boxes * sizeof(float),
|
||||
&op_data->sorted_values_idx);
|
||||
context->RequestScratchBufferInArena(context, num_boxes * sizeof(int),
|
||||
&op_data->keep_indices_idx);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, op_data->max_detections * num_boxes * sizeof(int),
|
||||
&op_data->sorted_indices_idx);
|
||||
int buffer_size = std::max(num_classes, op_data->max_detections);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, buffer_size * num_boxes * sizeof(int), &op_data->buffer_idx);
|
||||
buffer_size = std::min(num_boxes, op_data->max_detections);
|
||||
context->RequestScratchBufferInArena(
|
||||
context, buffer_size * num_boxes * sizeof(int), &op_data->selected_idx);
|
||||
|
||||
// Outputs: detection_boxes, detection_scores, detection_classes,
|
||||
// num_detections
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4);
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input_box_encodings);
|
||||
micro_context->DeallocateTempTfLiteTensor(input_class_predictions);
|
||||
micro_context->DeallocateTempTfLiteTensor(input_anchors);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
class Dequantizer {
|
||||
public:
|
||||
Dequantizer(int zero_point, float scale)
|
||||
: zero_point_(zero_point), scale_(scale) {}
|
||||
float operator()(uint8_t x) {
|
||||
return (static_cast<float>(x) - zero_point_) * scale_;
|
||||
}
|
||||
|
||||
private:
|
||||
int zero_point_;
|
||||
float scale_;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
T ReInterpretTensor(const TfLiteEvalTensor* tensor) {
|
||||
const float* tensor_base = tflite::micro::GetTensorData<float>(tensor);
|
||||
return reinterpret_cast<T>(tensor_base);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T ReInterpretTensor(TfLiteEvalTensor* tensor) {
|
||||
float* tensor_base = tflite::micro::GetTensorData<float>(tensor);
|
||||
return reinterpret_cast<T>(tensor_base);
|
||||
}
|
||||
|
||||
TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node,
|
||||
OpData* op_data) {
|
||||
// Parse input tensor boxencodings
|
||||
const TfLiteEvalTensor* input_box_encodings =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
|
||||
TF_LITE_ENSURE_EQ(context, input_box_encodings->dims->data[0], kBatchSize);
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
TF_LITE_ENSURE(context, input_box_encodings->dims->data[2] >= kNumCoordBox);
|
||||
const TfLiteEvalTensor* input_anchors =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorAnchors);
|
||||
|
||||
// Decode the boxes to get (ymin, xmin, ymax, xmax) based on the anchors
|
||||
CenterSizeEncoding box_centersize;
|
||||
CenterSizeEncoding scale_values = op_data->scale_values;
|
||||
CenterSizeEncoding anchor;
|
||||
for (int idx = 0; idx < num_boxes; ++idx) {
|
||||
switch (input_box_encodings->type) {
|
||||
// Float
|
||||
case kTfLiteFloat32: {
|
||||
// Please see DequantizeBoxEncodings function for the support detail.
|
||||
const int box_encoding_idx = idx * input_box_encodings->dims->data[2];
|
||||
const float* boxes = &(tflite::micro::GetTensorData<float>(
|
||||
input_box_encodings)[box_encoding_idx]);
|
||||
box_centersize = *reinterpret_cast<const CenterSizeEncoding*>(boxes);
|
||||
anchor =
|
||||
ReInterpretTensor<const CenterSizeEncoding*>(input_anchors)[idx];
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Unsupported type.
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
float ycenter = static_cast<float>(static_cast<double>(box_centersize.y) /
|
||||
static_cast<double>(scale_values.y) *
|
||||
static_cast<double>(anchor.h) +
|
||||
static_cast<double>(anchor.y));
|
||||
|
||||
float xcenter = static_cast<float>(static_cast<double>(box_centersize.x) /
|
||||
static_cast<double>(scale_values.x) *
|
||||
static_cast<double>(anchor.w) +
|
||||
static_cast<double>(anchor.x));
|
||||
|
||||
float half_h =
|
||||
static_cast<float>(0.5 *
|
||||
(std::exp(static_cast<double>(box_centersize.h) /
|
||||
static_cast<double>(scale_values.h))) *
|
||||
static_cast<double>(anchor.h));
|
||||
float half_w =
|
||||
static_cast<float>(0.5 *
|
||||
(std::exp(static_cast<double>(box_centersize.w) /
|
||||
static_cast<double>(scale_values.w))) *
|
||||
static_cast<double>(anchor.w));
|
||||
|
||||
float* decoded_boxes = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
|
||||
auto& box = reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[idx];
|
||||
box.ymin = ycenter - half_h;
|
||||
box.xmin = xcenter - half_w;
|
||||
box.ymax = ycenter + half_h;
|
||||
box.xmax = xcenter + half_w;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void DecreasingPartialArgSort(const float* values, int num_values,
|
||||
int num_to_sort, int* indices) {
|
||||
std::iota(indices, indices + num_values, 0);
|
||||
std::partial_sort(indices, indices + num_to_sort, indices + num_values,
|
||||
[&values](const int i, const int j) {
|
||||
return std::tie(values[i], j) > std::tie(values[j], i);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename Compare>
|
||||
void InsertionSort(int* start, int* end, Compare compare) {
|
||||
for (int* i = start; i != end; ++i) {
|
||||
std::rotate(std::upper_bound(start, i, *i, compare), i, i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Compare>
|
||||
void TopDownMerge(int* values, int* scratch, const int half_num_values,
|
||||
int num_values, Compare compare) {
|
||||
int left = 0;
|
||||
int right = half_num_values;
|
||||
|
||||
for (int i = 0; i < num_values; i++) {
|
||||
if (left >= half_num_values ||
|
||||
(right < num_values && compare(values[right], values[left]))) {
|
||||
scratch[i] = values[right++];
|
||||
} else {
|
||||
scratch[i] = values[left++];
|
||||
}
|
||||
}
|
||||
memcpy(values, scratch, num_values * sizeof(int));
|
||||
}
|
||||
|
||||
template <typename Compare>
|
||||
void MergeSort(int* values, int* scratch, const int num_values,
|
||||
Compare compare) {
|
||||
constexpr int threshold = 20;
|
||||
|
||||
if (num_values < threshold) {
|
||||
InsertionSort(values, values + num_values, compare);
|
||||
return;
|
||||
}
|
||||
|
||||
const int half_num_values = num_values / 2;
|
||||
|
||||
MergeSort(values, scratch, half_num_values, compare);
|
||||
MergeSort(values + half_num_values, scratch, num_values - half_num_values,
|
||||
compare);
|
||||
TopDownMerge(values, scratch, half_num_values, num_values, compare);
|
||||
}
|
||||
|
||||
void DecreasingArgSort(const float* values, int num_values, int* indices,
|
||||
int* scratch) {
|
||||
std::iota(indices, indices + num_values, 0);
|
||||
|
||||
MergeSort(indices, scratch, num_values, [&values](const int i, const int j) {
|
||||
return values[i] > values[j];
|
||||
});
|
||||
}
|
||||
|
||||
int SelectDetectionsAboveScoreThreshold(const float* values, int size,
|
||||
const float threshold,
|
||||
float* keep_values, int* keep_indices) {
|
||||
int counter = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (values[i] >= threshold) {
|
||||
keep_values[counter] = values[i];
|
||||
keep_indices[counter] = i;
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
return counter;
|
||||
}
|
||||
|
||||
bool ValidateBoxes(const float* decoded_boxes, const int num_boxes) {
|
||||
for (int i = 0; i < num_boxes; ++i) {
|
||||
// ymax>=ymin, xmax>=xmin
|
||||
auto& box = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[i];
|
||||
if (box.ymin >= box.ymax || box.xmin >= box.xmax) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
float ComputeIntersectionOverUnion(const float* decoded_boxes, const int i,
|
||||
const int j) {
|
||||
auto& box_i = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[i];
|
||||
auto& box_j = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[j];
|
||||
const float area_i = (box_i.ymax - box_i.ymin) * (box_i.xmax - box_i.xmin);
|
||||
const float area_j = (box_j.ymax - box_j.ymin) * (box_j.xmax - box_j.xmin);
|
||||
if (area_i <= 0 || area_j <= 0) return 0.0;
|
||||
const float intersection_ymin = std::max<float>(box_i.ymin, box_j.ymin);
|
||||
const float intersection_xmin = std::max<float>(box_i.xmin, box_j.xmin);
|
||||
const float intersection_ymax = std::min<float>(box_i.ymax, box_j.ymax);
|
||||
const float intersection_xmax = std::min<float>(box_i.xmax, box_j.xmax);
|
||||
const float intersection_area =
|
||||
std::max<float>(intersection_ymax - intersection_ymin, 0.0) *
|
||||
std::max<float>(intersection_xmax - intersection_xmin, 0.0);
|
||||
return intersection_area / (area_i + area_j - intersection_area);
|
||||
}
|
||||
|
||||
// NonMaxSuppressionSingleClass() prunes out the box locations with high overlap
|
||||
// before selecting the highest scoring boxes (max_detections in number)
|
||||
// It assumes all boxes are good in beginning and sorts based on the scores.
|
||||
// If lower-scoring box has too much overlap with a higher-scoring box,
|
||||
// we get rid of the lower-scoring box.
|
||||
// Complexity is O(N^2) pairwise comparison between boxes
|
||||
TfLiteStatus NonMaxSuppressionSingleClassHelper(
|
||||
TfLiteContext* context, TfLiteNode* node, OpData* op_data,
|
||||
const float* scores, int* selected, int* selected_size,
|
||||
int max_detections) {
|
||||
const TfLiteEvalTensor* input_box_encodings =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
const float non_max_suppression_score_threshold =
|
||||
op_data->non_max_suppression_score_threshold;
|
||||
const float intersection_over_union_threshold =
|
||||
op_data->intersection_over_union_threshold;
|
||||
// Maximum detections should be positive.
|
||||
TF_LITE_ENSURE(context, (max_detections >= 0));
|
||||
// intersection_over_union_threshold should be positive
|
||||
// and should be less than 1.
|
||||
TF_LITE_ENSURE(context, (intersection_over_union_threshold > 0.0f) &&
|
||||
(intersection_over_union_threshold <= 1.0f));
|
||||
// Validate boxes
|
||||
float* decoded_boxes = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
|
||||
|
||||
TF_LITE_ENSURE(context, ValidateBoxes(decoded_boxes, num_boxes));
|
||||
|
||||
// threshold scores
|
||||
int* keep_indices = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->keep_indices_idx));
|
||||
float* keep_scores = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->keep_scores_idx));
|
||||
int num_scores_kept = SelectDetectionsAboveScoreThreshold(
|
||||
scores, num_boxes, non_max_suppression_score_threshold, keep_scores,
|
||||
keep_indices);
|
||||
int* sorted_indices = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->sorted_indices_idx));
|
||||
|
||||
// Reusing keep_indices for scratch buffer and write back its values
|
||||
// after the sorting is done.
|
||||
DecreasingArgSort(keep_scores, num_scores_kept, sorted_indices, keep_indices);
|
||||
int counter = 0;
|
||||
for (int i = 0; i < num_boxes; i++) {
|
||||
if (scores[i] >= non_max_suppression_score_threshold) {
|
||||
keep_indices[counter] = i;
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
||||
const int num_boxes_kept = num_scores_kept;
|
||||
const int output_size = std::min(num_boxes_kept, max_detections);
|
||||
*selected_size = 0;
|
||||
|
||||
int num_active_candidate = num_boxes_kept;
|
||||
uint8_t* active_box_candidate = reinterpret_cast<uint8_t*>(
|
||||
context->GetScratchBuffer(context, op_data->active_candidate_idx));
|
||||
|
||||
for (int row = 0; row < num_boxes_kept; row++) {
|
||||
active_box_candidate[row] = 1;
|
||||
}
|
||||
for (int i = 0; i < num_boxes_kept; ++i) {
|
||||
if (num_active_candidate == 0 || *selected_size >= output_size) break;
|
||||
if (active_box_candidate[i] == 1) {
|
||||
selected[(*selected_size)++] = keep_indices[sorted_indices[i]];
|
||||
active_box_candidate[i] = 0;
|
||||
num_active_candidate--;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
for (int j = i + 1; j < num_boxes_kept; ++j) {
|
||||
if (active_box_candidate[j] == 1) {
|
||||
float intersection_over_union = ComputeIntersectionOverUnion(
|
||||
decoded_boxes, keep_indices[sorted_indices[i]],
|
||||
keep_indices[sorted_indices[j]]);
|
||||
|
||||
if (intersection_over_union > intersection_over_union_threshold) {
|
||||
active_box_candidate[j] = 0;
|
||||
num_active_candidate--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// This function implements a regular version of Non Maximal Suppression (NMS)
|
||||
// for multiple classes where
|
||||
// 1) we do NMS separately for each class across all anchors and
|
||||
// 2) keep only the highest anchor scores across all classes
|
||||
// 3) The worst runtime of the regular NMS is O(K*N^2)
|
||||
// where N is the number of anchors and K the number of
|
||||
// classes.
|
||||
TfLiteStatus NonMaxSuppressionMultiClassRegularHelper(TfLiteContext* context,
|
||||
TfLiteNode* node,
|
||||
OpData* op_data,
|
||||
const float* scores) {
|
||||
const TfLiteEvalTensor* input_box_encodings =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
|
||||
const TfLiteEvalTensor* input_class_predictions =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
|
||||
TfLiteEvalTensor* detection_boxes =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes);
|
||||
TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput(
|
||||
context, node, kOutputTensorDetectionClasses);
|
||||
TfLiteEvalTensor* detection_scores =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores);
|
||||
TfLiteEvalTensor* num_detections =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections);
|
||||
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
const int num_classes = op_data->num_classes;
|
||||
const int num_detections_per_class = op_data->detections_per_class;
|
||||
const int max_detections = op_data->max_detections;
|
||||
const int num_classes_with_background =
|
||||
input_class_predictions->dims->data[2];
|
||||
// The row index offset is 1 if background class is included and 0 otherwise.
|
||||
int label_offset = num_classes_with_background - num_classes;
|
||||
TF_LITE_ENSURE(context, num_detections_per_class > 0);
|
||||
|
||||
// For each class, perform non-max suppression.
|
||||
float* class_scores = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->score_buffer_idx));
|
||||
int* box_indices_after_regular_non_max_suppression = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->buffer_idx));
|
||||
float* scores_after_regular_non_max_suppression =
|
||||
reinterpret_cast<float*>(context->GetScratchBuffer(
|
||||
context, op_data->scores_after_regular_non_max_suppression_idx));
|
||||
|
||||
int size_of_sorted_indices = 0;
|
||||
int* sorted_indices = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->sorted_indices_idx));
|
||||
float* sorted_values = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->sorted_values_idx));
|
||||
|
||||
for (int col = 0; col < num_classes; col++) {
|
||||
for (int row = 0; row < num_boxes; row++) {
|
||||
// Get scores of boxes corresponding to all anchors for single class
|
||||
class_scores[row] =
|
||||
*(scores + row * num_classes_with_background + col + label_offset);
|
||||
}
|
||||
// Perform non-maximal suppression on single class
|
||||
int selected_size = 0;
|
||||
int* selected = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->selected_idx));
|
||||
TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper(
|
||||
context, node, op_data, class_scores, selected, &selected_size,
|
||||
num_detections_per_class));
|
||||
// Add selected indices from non-max suppression of boxes in this class
|
||||
int output_index = size_of_sorted_indices;
|
||||
for (int i = 0; i < selected_size; i++) {
|
||||
int selected_index = selected[i];
|
||||
|
||||
box_indices_after_regular_non_max_suppression[output_index] =
|
||||
(selected_index * num_classes_with_background + col + label_offset);
|
||||
scores_after_regular_non_max_suppression[output_index] =
|
||||
class_scores[selected_index];
|
||||
output_index++;
|
||||
}
|
||||
// Sort the max scores among the selected indices
|
||||
// Get the indices for top scores
|
||||
int num_indices_to_sort = std::min(output_index, max_detections);
|
||||
DecreasingPartialArgSort(scores_after_regular_non_max_suppression,
|
||||
output_index, num_indices_to_sort, sorted_indices);
|
||||
|
||||
// Copy values to temporary vectors
|
||||
for (int row = 0; row < num_indices_to_sort; row++) {
|
||||
int temp = sorted_indices[row];
|
||||
sorted_indices[row] = box_indices_after_regular_non_max_suppression[temp];
|
||||
sorted_values[row] = scores_after_regular_non_max_suppression[temp];
|
||||
}
|
||||
// Copy scores and indices from temporary vectors
|
||||
for (int row = 0; row < num_indices_to_sort; row++) {
|
||||
box_indices_after_regular_non_max_suppression[row] = sorted_indices[row];
|
||||
scores_after_regular_non_max_suppression[row] = sorted_values[row];
|
||||
}
|
||||
size_of_sorted_indices = num_indices_to_sort;
|
||||
}
|
||||
|
||||
// Allocate output tensors
|
||||
for (int output_box_index = 0; output_box_index < max_detections;
|
||||
output_box_index++) {
|
||||
if (output_box_index < size_of_sorted_indices) {
|
||||
const int anchor_index = floor(
|
||||
box_indices_after_regular_non_max_suppression[output_box_index] /
|
||||
num_classes_with_background);
|
||||
const int class_index =
|
||||
box_indices_after_regular_non_max_suppression[output_box_index] -
|
||||
anchor_index * num_classes_with_background - label_offset;
|
||||
const float selected_score =
|
||||
scores_after_regular_non_max_suppression[output_box_index];
|
||||
// detection_boxes
|
||||
float* decoded_boxes = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
|
||||
ReInterpretTensor<BoxCornerEncoding*>(detection_boxes)[output_box_index] =
|
||||
reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[anchor_index];
|
||||
// detection_classes
|
||||
tflite::micro::GetTensorData<float>(detection_classes)[output_box_index] =
|
||||
class_index;
|
||||
// detection_scores
|
||||
tflite::micro::GetTensorData<float>(detection_scores)[output_box_index] =
|
||||
selected_score;
|
||||
} else {
|
||||
ReInterpretTensor<BoxCornerEncoding*>(
|
||||
detection_boxes)[output_box_index] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
// detection_classes
|
||||
tflite::micro::GetTensorData<float>(detection_classes)[output_box_index] =
|
||||
0.0f;
|
||||
// detection_scores
|
||||
tflite::micro::GetTensorData<float>(detection_scores)[output_box_index] =
|
||||
0.0f;
|
||||
}
|
||||
}
|
||||
tflite::micro::GetTensorData<float>(num_detections)[0] =
|
||||
size_of_sorted_indices;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
// This function implements a fast version of Non Maximal Suppression for
|
||||
// multiple classes where
|
||||
// 1) we keep the top-k scores for each anchor and
|
||||
// 2) during NMS, each anchor only uses the highest class score for sorting.
|
||||
// 3) Compared to standard NMS, the worst runtime of this version is O(N^2)
|
||||
// instead of O(KN^2) where N is the number of anchors and K the number of
|
||||
// classes.
|
||||
TfLiteStatus NonMaxSuppressionMultiClassFastHelper(TfLiteContext* context,
|
||||
TfLiteNode* node,
|
||||
OpData* op_data,
|
||||
const float* scores) {
|
||||
const TfLiteEvalTensor* input_box_encodings =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
|
||||
const TfLiteEvalTensor* input_class_predictions =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
|
||||
TfLiteEvalTensor* detection_boxes =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes);
|
||||
|
||||
TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput(
|
||||
context, node, kOutputTensorDetectionClasses);
|
||||
TfLiteEvalTensor* detection_scores =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores);
|
||||
TfLiteEvalTensor* num_detections =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections);
|
||||
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
const int num_classes = op_data->num_classes;
|
||||
const int max_categories_per_anchor = op_data->max_classes_per_detection;
|
||||
const int num_classes_with_background =
|
||||
input_class_predictions->dims->data[2];
|
||||
|
||||
// The row index offset is 1 if background class is included and 0 otherwise.
|
||||
int label_offset = num_classes_with_background - num_classes;
|
||||
TF_LITE_ENSURE(context, (max_categories_per_anchor > 0));
|
||||
const int num_categories_per_anchor =
|
||||
std::min(max_categories_per_anchor, num_classes);
|
||||
float* max_scores = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->score_buffer_idx));
|
||||
int* sorted_class_indices = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->buffer_idx));
|
||||
|
||||
for (int row = 0; row < num_boxes; row++) {
|
||||
const float* box_scores =
|
||||
scores + row * num_classes_with_background + label_offset;
|
||||
int* class_indices = sorted_class_indices + row * num_classes;
|
||||
DecreasingPartialArgSort(box_scores, num_classes, num_categories_per_anchor,
|
||||
class_indices);
|
||||
max_scores[row] = box_scores[class_indices[0]];
|
||||
}
|
||||
|
||||
// Perform non-maximal suppression on max scores
|
||||
int selected_size = 0;
|
||||
int* selected = reinterpret_cast<int*>(
|
||||
context->GetScratchBuffer(context, op_data->selected_idx));
|
||||
TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper(
|
||||
context, node, op_data, max_scores, selected, &selected_size,
|
||||
op_data->max_detections));
|
||||
|
||||
// Allocate output tensors
|
||||
int output_box_index = 0;
|
||||
|
||||
for (int i = 0; i < selected_size; i++) {
|
||||
int selected_index = selected[i];
|
||||
|
||||
const float* box_scores =
|
||||
scores + selected_index * num_classes_with_background + label_offset;
|
||||
const int* class_indices =
|
||||
sorted_class_indices + selected_index * num_classes;
|
||||
|
||||
for (int col = 0; col < num_categories_per_anchor; ++col) {
|
||||
int box_offset = num_categories_per_anchor * output_box_index + col;
|
||||
|
||||
// detection_boxes
|
||||
float* decoded_boxes = reinterpret_cast<float*>(
|
||||
context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
|
||||
ReInterpretTensor<BoxCornerEncoding*>(detection_boxes)[box_offset] =
|
||||
reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[selected_index];
|
||||
|
||||
// detection_classes
|
||||
tflite::micro::GetTensorData<float>(detection_classes)[box_offset] =
|
||||
class_indices[col];
|
||||
|
||||
// detection_scores
|
||||
tflite::micro::GetTensorData<float>(detection_scores)[box_offset] =
|
||||
box_scores[class_indices[col]];
|
||||
|
||||
output_box_index++;
|
||||
}
|
||||
}
|
||||
|
||||
tflite::micro::GetTensorData<float>(num_detections)[0] = output_box_index;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus NonMaxSuppressionMultiClass(TfLiteContext* context,
|
||||
TfLiteNode* node, OpData* op_data) {
|
||||
// Get the input tensors
|
||||
const TfLiteEvalTensor* input_box_encodings =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
|
||||
const TfLiteEvalTensor* input_class_predictions =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
|
||||
const int num_boxes = input_box_encodings->dims->data[1];
|
||||
const int num_classes = op_data->num_classes;
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[0],
|
||||
kBatchSize);
|
||||
TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[1], num_boxes);
|
||||
const int num_classes_with_background =
|
||||
input_class_predictions->dims->data[2];
|
||||
|
||||
TF_LITE_ENSURE(context, (num_classes_with_background - num_classes <= 1));
|
||||
TF_LITE_ENSURE(context, (num_classes_with_background >= num_classes));
|
||||
|
||||
const float* scores;
|
||||
switch (input_class_predictions->type) {
|
||||
case kTfLiteFloat32:
|
||||
scores = tflite::micro::GetTensorData<float>(input_class_predictions);
|
||||
break;
|
||||
default:
|
||||
// Unsupported type.
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (op_data->use_regular_non_max_suppression) {
|
||||
TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClassRegularHelper(
|
||||
context, node, op_data, scores));
|
||||
} else {
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
NonMaxSuppressionMultiClassFastHelper(context, node, op_data, scores));
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, (kBatchSize == 1));
|
||||
auto* op_data = static_cast<OpData*>(node->user_data);
|
||||
|
||||
// These two functions correspond to two blocks in the Object Detection model.
|
||||
// In future, we would like to break the custom op in two blocks, which is
|
||||
// currently not feasible because we would like to input quantized inputs
|
||||
// and do all calculations in float. Mixed quantized/float calculations are
|
||||
// currently not supported in TFLite.
|
||||
|
||||
// This fills in temporary decoded_boxes
|
||||
// by transforming input_box_encodings and input_anchors from
|
||||
// CenterSizeEncodings to BoxCornerEncoding
|
||||
TF_LITE_ENSURE_STATUS(DecodeCenterSizeBoxes(context, node, op_data));
|
||||
|
||||
// This fills in the output tensors
|
||||
// by choosing effective set of decoded boxes
|
||||
// based on Non Maximal Suppression, i.e. selecting
|
||||
// highest scoring non-overlapping boxes.
|
||||
TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClass(context, node, op_data));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration* Register_DETECTION_POSTPROCESS() {
|
||||
static TfLiteRegistration r = tflite::micro::RegisterOp(Init, Prepare, Eval);
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,25 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_FLEXBUFFERS_GENERATED_DATA_H
|
||||
|
||||
extern const int g_gen_data_size_none_regular_nms;
|
||||
extern const unsigned char g_gen_data_none_regular_nms[];
|
||||
|
||||
extern const int g_gen_data_size_regular_nms;
|
||||
extern const unsigned char g_gen_data_regular_nms[];
|
||||
|
||||
#endif
|
||||
@@ -1,11 +0,0 @@
|
||||
# Info
|
||||
|
||||
These are the Espressif chipset specific replacement kernels.
|
||||
The kernels call optimized routines or reference routines depending upon optimization option selected.
|
||||
|
||||
By default optimizations are selected if available.
|
||||
To change this behaviour, please make the appropriate `ESP-NN` menu selection after running:
|
||||
|
||||
```
|
||||
idf.py menuconfig
|
||||
```
|
||||
@@ -1,202 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/add.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/add.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
#include <esp_timer.h>
|
||||
|
||||
#if ESP_NN
|
||||
#include <esp_nn.h>
|
||||
#endif
|
||||
|
||||
long long add_total_time = 0;
|
||||
|
||||
namespace tflite {
|
||||
|
||||
void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
|
||||
const OpDataAdd* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(data->output_activation_min_f32,
|
||||
data->output_activation_max_f32, &op_params);
|
||||
if (data->requires_broadcast) {
|
||||
reference_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteAddParams* params, const OpDataAdd* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
op_params.input1_offset = data->input1_offset;
|
||||
op_params.input1_multiplier = data->input1_multiplier;
|
||||
op_params.input1_shift = data->input1_shift;
|
||||
op_params.input2_offset = data->input2_offset;
|
||||
op_params.input2_multiplier = data->input2_multiplier;
|
||||
op_params.input2_shift = data->input2_shift;
|
||||
op_params.output_offset = data->output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
SetActivationParams(data->output_activation_min, data->output_activation_max,
|
||||
&op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8: {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
#if ESP_NN
|
||||
const int8_t *input1_data = tflite::micro::GetTensorData<int8_t>(input1);
|
||||
const int8_t *input2_data = tflite::micro::GetTensorData<int8_t>(input2);
|
||||
int8_t *out_data = tflite::micro::GetTensorData<int8_t>(output);
|
||||
|
||||
esp_nn_add_elementwise_s8(input1_data,
|
||||
input2_data,
|
||||
data->input1_offset,
|
||||
data->input2_offset,
|
||||
data->input1_multiplier,
|
||||
data->input2_multiplier,
|
||||
data->input1_shift,
|
||||
data->input2_shift,
|
||||
data->left_shift,
|
||||
out_data,
|
||||
data->output_offset,
|
||||
data->output_multiplier,
|
||||
data->output_shift,
|
||||
data->output_activation_min,
|
||||
data->output_activation_max,
|
||||
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorShape(output))
|
||||
);
|
||||
#else
|
||||
reference_integer_ops::Add(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt16: {
|
||||
if (need_broadcast) {
|
||||
reference_ops::BroadcastAdd4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int16_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int16_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int16_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int16_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output),
|
||||
false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MicroPrintf("Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(output->type), output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* AddInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd));
|
||||
}
|
||||
|
||||
TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kAddInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kAddInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kAddOutputTensor);
|
||||
|
||||
long long start_time = esp_timer_get_time();
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalAdd(context, node, params, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
add_total_time += esp_timer_get_time() - start_time;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_ADD() {
|
||||
return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,191 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/fully_connected.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
#if ESP_NN
|
||||
#include <esp_nn.h>
|
||||
#endif
|
||||
|
||||
#include <esp_timer.h>
|
||||
|
||||
long long fc_total_time = 0;
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context,
|
||||
sizeof(OpDataFullyConnected));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
auto* data = static_cast<OpDataFullyConnected*>(node->user_data);
|
||||
const auto params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* filter = micro_context->AllocateTempInputTensor(
|
||||
node, kFullyConnectedWeightsTensor);
|
||||
TF_LITE_ENSURE(context, filter != nullptr);
|
||||
TfLiteTensor* bias =
|
||||
micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor);
|
||||
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
|
||||
node, kFullyConnectedOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||
"Hybrid models are not supported on TFLite Micro.");
|
||||
|
||||
TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected(
|
||||
context, params->activation, input->type,
|
||||
input, filter, bias, output, data));
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(filter);
|
||||
if (bias != nullptr) {
|
||||
micro_context->DeallocateTempTfLiteTensor(bias);
|
||||
}
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
const auto* params =
|
||||
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor);
|
||||
const TfLiteEvalTensor* filter =
|
||||
tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const auto& data =
|
||||
*(static_cast<const OpDataFullyConnected*>(node->user_data));
|
||||
|
||||
long long start_time = esp_timer_get_time();
|
||||
// Checks in Prepare ensure input, output and filter types are all the same.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::reference_ops::FullyConnected(
|
||||
FullyConnectedParamsFloat(params->activation),
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<float>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<float>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
break;
|
||||
}
|
||||
|
||||
case kTfLiteInt8: {
|
||||
const int32_t* bias_data =
|
||||
nullptr != bias ? tflite::micro::GetTensorData<int32_t>(bias)
|
||||
: nullptr;
|
||||
#if ESP_NN
|
||||
const RuntimeShape& filter_shape = tflite::micro::GetTensorShape(filter);
|
||||
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
|
||||
const int filter_dim_count = filter_shape.DimensionsCount();
|
||||
const int batches = output_shape.Dims(0);
|
||||
const int output_depth = output_shape.Dims(1);
|
||||
TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
|
||||
const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
|
||||
|
||||
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
|
||||
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
|
||||
const int8_t *filter_data = tflite::micro::GetTensorData<int8_t>(filter);
|
||||
|
||||
for (int b = 0; b < batches; ++b) {
|
||||
esp_nn_fully_connected_s8(input_data, -data.input_zero_point,
|
||||
accum_depth,
|
||||
filter_data, -data.filter_zero_point,
|
||||
bias_data, output_data, output_depth,
|
||||
data.output_zero_point,
|
||||
data.output_shift, data.output_multiplier,
|
||||
data.output_activation_min,
|
||||
data.output_activation_max);
|
||||
input_data += accum_depth;
|
||||
output_data += output_depth;
|
||||
}
|
||||
#else
|
||||
tflite::reference_integer_ops::FullyConnected(
|
||||
FullyConnectedParamsQuantized(data),
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<int8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias), bias_data,
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case kTfLiteUInt8: {
|
||||
tflite::reference_ops::FullyConnected(
|
||||
FullyConnectedParamsQuantized(data),
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<uint8_t>(input),
|
||||
tflite::micro::GetTensorShape(filter),
|
||||
tflite::micro::GetTensorData<uint8_t>(filter),
|
||||
tflite::micro::GetTensorShape(bias),
|
||||
tflite::micro::GetTensorData<int32_t>(bias),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input->type), input->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
fc_total_time += esp_timer_get_time() - start_time;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_FULLY_CONNECTED() {
|
||||
return tflite::micro::RegisterOp(Init, Prepare, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,124 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/mul.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/mul.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
#if ESP_NN
|
||||
#include <esp_nn.h>
|
||||
#endif
|
||||
|
||||
#include <esp_timer.h>
|
||||
|
||||
long long mul_total_time = 0;
|
||||
|
||||
namespace tflite {
|
||||
#if ESP_NN
|
||||
void MulEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpDataMul* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
op_params.input1_offset = -data->input1_zero_point;
|
||||
op_params.input2_offset = -data->input2_zero_point;
|
||||
op_params.output_offset = data->output_zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
const int8_t *input1_data = tflite::micro::GetTensorData<int8_t>(input1);
|
||||
const int8_t *input2_data = tflite::micro::GetTensorData<int8_t>(input2);
|
||||
int8_t *out_data = tflite::micro::GetTensorData<int8_t>(output);
|
||||
|
||||
esp_nn_mul_elementwise_s8(input1_data, input2_data, op_params.input1_offset,
|
||||
op_params.input2_offset, out_data, op_params.output_offset,
|
||||
op_params.output_multiplier, op_params.output_shift,
|
||||
op_params.quantized_activation_min, op_params.quantized_activation_max,
|
||||
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorShape(output)));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
|
||||
|
||||
long long start_time = esp_timer_get_time();
|
||||
switch (input1->type) {
|
||||
case kTfLiteInt8:
|
||||
#if ESP_NN
|
||||
MulEvalQuantized(context, node, data, input1, input2, output);
|
||||
#else
|
||||
EvalMulQuantizedReference(context, node, data, input1, input2, output);
|
||||
#endif
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
EvalMulQuantizedReference(context, node, data, input1, input2, output);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
EvalMulFloatReference(context, node, params, data, input1, input2,
|
||||
output);
|
||||
break;
|
||||
default:
|
||||
MicroPrintf("Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
mul_total_time += esp_timer_get_time() - start_time;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MUL() {
|
||||
return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,231 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/pooling.h"
|
||||
|
||||
#if ESP_NN
|
||||
#include <esp_nn.h>
|
||||
#endif
|
||||
|
||||
#include <esp_timer.h>
|
||||
|
||||
long long pooling_total_time = 0;
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
#if ESP_NN
|
||||
void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params, const OpDataPooling* data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
|
||||
const int stride_height = params->stride_height;
|
||||
const int stride_width = params->stride_width;
|
||||
const int filter_height = params->filter_height;
|
||||
const int filter_width = params->filter_width;
|
||||
const int activation_min = data->activation_min;
|
||||
const int activation_max = data->activation_max;
|
||||
const int pad_height = data->padding.height;
|
||||
const int pad_width = data->padding.width;
|
||||
|
||||
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
|
||||
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
|
||||
TFLITE_DCHECK_LE(activation_min, activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
|
||||
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
|
||||
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
|
||||
|
||||
const int input_size = input_width * input_height * depth;
|
||||
const int output_size = output_width * output_height * depth;
|
||||
|
||||
if (depth % 4 == 0) { // S3 version only supports channels multiple of 4
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
esp_nn_avg_pool_s8(input_data, input_width, input_height,
|
||||
output_data, output_width, output_height,
|
||||
stride_width, stride_height,
|
||||
filter_width, filter_height,
|
||||
pad_width, pad_height,
|
||||
activation_min, activation_max, depth);
|
||||
input_data += input_size;
|
||||
output_data += output_size;
|
||||
}
|
||||
} else {
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
esp_nn_avg_pool_s8_ansi(input_data, input_width, input_height,
|
||||
output_data, output_width, output_height,
|
||||
stride_width, stride_height,
|
||||
filter_width, filter_height,
|
||||
pad_width, pad_height,
|
||||
activation_min, activation_max, depth);
|
||||
input_data += input_size;
|
||||
output_data += output_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, const OpDataPooling* data,
|
||||
const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
|
||||
|
||||
const int stride_height = params->stride_height;
|
||||
const int stride_width = params->stride_width;
|
||||
const int filter_height = params->filter_height;
|
||||
const int filter_width = params->filter_width;
|
||||
const int activation_min = data->activation_min;
|
||||
const int activation_max = data->activation_max;
|
||||
const int pad_height = data->padding.height;
|
||||
const int pad_width = data->padding.width;
|
||||
|
||||
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
|
||||
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
|
||||
TFLITE_DCHECK_LE(activation_min, activation_max);
|
||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
|
||||
const int depth = MatchingDim(input_shape, 3, output_shape, 3);
|
||||
const int input_height = input_shape.Dims(1);
|
||||
const int input_width = input_shape.Dims(2);
|
||||
const int output_height = output_shape.Dims(1);
|
||||
const int output_width = output_shape.Dims(2);
|
||||
|
||||
const int8_t *input_data = tflite::micro::GetTensorData<int8_t>(input);
|
||||
int8_t *output_data = tflite::micro::GetTensorData<int8_t>(output);
|
||||
|
||||
const int input_size = input_width * input_height * depth;
|
||||
const int output_size = output_width * output_height * depth;
|
||||
if (depth % 4 == 0) { // S3 version only supports channels multiple of 4
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
esp_nn_max_pool_s8(input_data, input_width, input_height,
|
||||
output_data, output_width, output_height,
|
||||
stride_width, stride_height,
|
||||
filter_width, filter_height,
|
||||
pad_width, pad_height,
|
||||
activation_min, activation_max, depth);
|
||||
input_data += input_size;
|
||||
output_data += output_size;
|
||||
}
|
||||
} else {
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
esp_nn_max_pool_s8_ansi(input_data, input_width, input_height,
|
||||
output_data, output_width, output_height,
|
||||
stride_width, stride_height,
|
||||
filter_width, filter_height,
|
||||
pad_width, pad_height,
|
||||
activation_min, activation_max, depth);
|
||||
input_data += input_size;
|
||||
output_data += output_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpDataPooling* data =
|
||||
static_cast<const OpDataPooling*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
micro::GetEvalInput(context, node, kPoolingInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
|
||||
|
||||
long long start_time = esp_timer_get_time();
|
||||
// Inputs and outputs share the same type, guaranteed by the converter.
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
AveragePoolingEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
#if ESP_NN
|
||||
AverageEvalQuantized(context, node, params, data, input, output);
|
||||
#else
|
||||
AveragePoolingEvalQuantized(context, node, params, data, input, output);
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
pooling_total_time += esp_timer_get_time() - start_time;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpDataPooling* data =
|
||||
static_cast<const OpDataPooling*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
micro::GetEvalInput(context, node, kPoolingInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
micro::GetEvalOutput(context, node, kPoolingOutputTensor);
|
||||
|
||||
long long start_time = esp_timer_get_time();
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32:
|
||||
MaxPoolingEvalFloat(context, node, params, data, input, output);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
#if ESP_NN
|
||||
MaxEvalQuantized(context, node, params, data, input, output);
|
||||
#else
|
||||
MaxPoolingEvalQuantized(context, node, params, data, input, output);
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
pooling_total_time += esp_timer_get_time() - start_time;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_AVERAGE_POOL_2D() {
|
||||
return tflite::micro::RegisterOp(Init, PoolingPrepare, AverageEval);
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MAX_POOL_2D() {
|
||||
return tflite::micro::RegisterOp(Init, PoolingPrepare, MaxEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,27 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
//
|
||||
// This is a stub file for non-Ethos platforms
|
||||
//
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteRegistration* Register_ETHOSU() { return nullptr; }
|
||||
|
||||
const char* GetString_ETHOSU() { return ""; }
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,28 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteRegistration* Register_ETHOSU();
|
||||
|
||||
const char* GetString_ETHOSU();
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ETHOSU_H_
|
||||
@@ -1,50 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/floor.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace floor {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
reference_ops::Floor(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace floor
|
||||
|
||||
TfLiteRegistration Register_FLOOR() {
|
||||
return tflite::micro::RegisterOp(nullptr, nullptr, floor::Eval);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -1,104 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
struct OpDataFullyConnected {
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
// The range of the fused activation layer. For example for kNone and
|
||||
// uint8_t these would be 0 and 255.
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
// The index of the temporary tensor where the quantized inputs are cached.
|
||||
int input_quantized_index;
|
||||
// Cached zero point values of tensors.
|
||||
int32_t input_zero_point;
|
||||
int32_t filter_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
extern const int kFullyConnectedInputTensor;
|
||||
extern const int kFullyConnectedWeightsTensor;
|
||||
extern const int kFullyConnectedBiasTensor;
|
||||
extern const int kFullyConnectedOutputTensor;
|
||||
|
||||
// Returns a FullyConnectedParams struct with all the parameters needed for a
|
||||
// float computation.
|
||||
FullyConnectedParams FullyConnectedParamsFloat(
|
||||
TfLiteFusedActivation activation);
|
||||
|
||||
// Returns a FullyConnectedParams struct with all the parameters needed for a
|
||||
// quantized computation.
|
||||
FullyConnectedParams FullyConnectedParamsQuantized(
|
||||
const OpDataFullyConnected& op_data);
|
||||
|
||||
TfLiteStatus CalculateOpDataFullyConnected(
|
||||
TfLiteContext* context, TfLiteFusedActivation activation,
|
||||
TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output, OpDataFullyConnected* data);
|
||||
|
||||
// This is the most generic TfLiteRegistration. The actual supported types may
|
||||
// still be target dependent. The only requirement is that every implementation
|
||||
// (reference or optimized) must define this function.
|
||||
TfLiteRegistration Register_FULLY_CONNECTED();
|
||||
|
||||
#if defined(CMSIS_NN) || defined(HEXAGON)
|
||||
// Returns a TfLiteRegistration struct for kernel variant that only supports
|
||||
// int8.
|
||||
TfLiteRegistration Register_FULLY_CONNECTED_INT8();
|
||||
|
||||
#else
|
||||
// Note that while this block gets used for both reference and optimized kernels
|
||||
// that do not have any specialized implementations, the only goal here is to
|
||||
// define fallback implementation that allow reference kernels to still be used
|
||||
// from applications that call a more specific kernel variant.
|
||||
|
||||
inline TfLiteRegistration Register_FULLY_CONNECTED_INT8() {
|
||||
return Register_FULLY_CONNECTED();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(CMSIS_NN)
|
||||
// Returns a TfLiteRegistration struct for kernel variant that only supports
|
||||
// int16.
|
||||
TfLiteRegistration Register_FULLY_CONNECTED_INT16();
|
||||
|
||||
#else
|
||||
// Note that while this block gets used for both reference and optimized kernels
|
||||
// that do not have any specialized implementations, the only goal here is to
|
||||
// define fallback implementation that allow reference kernels to still be used
|
||||
// from applications that call a more specific kernel variant.
|
||||
|
||||
inline TfLiteRegistration Register_FULLY_CONNECTED_INT16() {
|
||||
return Register_FULLY_CONNECTED();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
|
||||
@@ -1,83 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/fully_connected.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kFullyConnectedInputTensor = 0;
|
||||
const int kFullyConnectedWeightsTensor = 1;
|
||||
const int kFullyConnectedBiasTensor = 2;
|
||||
const int kFullyConnectedOutputTensor = 0;
|
||||
|
||||
FullyConnectedParams FullyConnectedParamsQuantized(
|
||||
const OpDataFullyConnected& op_data) {
|
||||
FullyConnectedParams op_params;
|
||||
op_params.input_offset = -op_data.input_zero_point;
|
||||
op_params.weights_offset = -op_data.filter_zero_point;
|
||||
op_params.output_offset = op_data.output_zero_point;
|
||||
op_params.output_multiplier = op_data.output_multiplier;
|
||||
op_params.output_shift = op_data.output_shift;
|
||||
op_params.quantized_activation_min = op_data.output_activation_min;
|
||||
op_params.quantized_activation_max = op_data.output_activation_max;
|
||||
return op_params;
|
||||
}
|
||||
|
||||
FullyConnectedParams FullyConnectedParamsFloat(
|
||||
TfLiteFusedActivation activation) {
|
||||
FullyConnectedParams op_params;
|
||||
CalculateActivationRange(activation, &op_params.float_activation_min,
|
||||
&op_params.float_activation_max);
|
||||
return op_params;
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpDataFullyConnected(
|
||||
TfLiteContext* context, TfLiteFusedActivation activation,
|
||||
TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||
OpDataFullyConnected* data) {
|
||||
if (data_type != kTfLiteFloat32) {
|
||||
double real_multiplier = 0.0;
|
||||
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
|
||||
context, input, filter, bias, output, &real_multiplier));
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
// Filter weights will always be symmetric quantized since we only support
|
||||
// int8 quantization. See
|
||||
// https://github.com/tensorflow/tensorflow/issues/44912 for additional
|
||||
// context.
|
||||
TFLITE_DCHECK(filter->params.zero_point == 0);
|
||||
data->filter_zero_point = filter->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return CalculateActivationRangeQuantized(context, activation, output,
|
||||
&data->output_activation_min,
|
||||
&data->output_activation_max);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,75 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/hard_swish.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
|
||||
}
|
||||
|
||||
TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kHardSwishInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kHardSwishOutputTensor);
|
||||
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
tflite::reference_ops::HardSwish<float>(
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
tflite::reference_ops::HardSwish<int8_t>(
|
||||
*params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} break;
|
||||
default: {
|
||||
MicroPrintf("Unsupported type %s", TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_HARD_SWISH() {
|
||||
return tflite::micro::RegisterOp(HardSwishInit, tflite::HardSwishPrepare,
|
||||
HardSwishEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,30 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
extern const int kHardSwishInputTensor;
|
||||
extern const int kHardSwishOutputTensor;
|
||||
|
||||
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
|
||||
@@ -1,86 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/hard_swish.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kHardSwishInputTensor = 0;
|
||||
const int kHardSwishOutputTensor = 0;
|
||||
|
||||
TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kHardSwishInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kHardSwishOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
|
||||
|
||||
params->input_zero_point = input->params.zero_point;
|
||||
params->output_zero_point = output->params.zero_point;
|
||||
|
||||
const float input_scale = input->params.scale;
|
||||
const float hires_input_scale = (1.0f / 128.0f) * input_scale;
|
||||
const float reluish_scale = 3.0f / 32768.0f;
|
||||
const float output_scale = output->params.scale;
|
||||
|
||||
const double output_multiplier =
|
||||
static_cast<double>(hires_input_scale / output_scale);
|
||||
int32_t output_multiplier_fixedpoint_int32;
|
||||
QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
|
||||
¶ms->output_multiplier_exponent);
|
||||
DownScaleInt32ToInt16Multiplier(
|
||||
output_multiplier_fixedpoint_int32,
|
||||
¶ms->output_multiplier_fixedpoint_int16);
|
||||
|
||||
TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
|
||||
|
||||
const double reluish_multiplier =
|
||||
static_cast<double>(hires_input_scale / reluish_scale);
|
||||
int32_t reluish_multiplier_fixedpoint_int32;
|
||||
QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
|
||||
¶ms->reluish_multiplier_exponent);
|
||||
DownScaleInt32ToInt16Multiplier(
|
||||
reluish_multiplier_fixedpoint_int32,
|
||||
¶ms->reluish_multiplier_fixedpoint_int16);
|
||||
}
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,121 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_context.h"
|
||||
#include "tensorflow/lite/micro/micro_graph.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
int then_subgraph_index;
|
||||
int else_subgraph_index;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
const auto* params =
|
||||
reinterpret_cast<const TfLiteIfParams*>(node->builtin_data);
|
||||
op_data->then_subgraph_index = params->then_subgraph_index;
|
||||
op_data->else_subgraph_index = params->else_subgraph_index;
|
||||
|
||||
TF_LITE_ENSURE(context, node->inputs->size > 0);
|
||||
|
||||
// The first input is the condition.
|
||||
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
|
||||
TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0);
|
||||
|
||||
TF_LITE_ENSURE(context, cond != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool);
|
||||
TF_LITE_ENSURE_EQ(context, NumElements(cond), 1);
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(cond);
|
||||
|
||||
// The first input of the node is the condition. The rest of inputs are
|
||||
// passed to the branch subgraphs. Therefore, the number of subgraph inputs
|
||||
// will be the number of node inputs - 1.
|
||||
size_t num_inputs = node->inputs->size - 1;
|
||||
size_t num_outputs = node->outputs->size;
|
||||
|
||||
MicroGraph& graph_info = micro_context->graph();
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
op_data->then_subgraph_index < graph_info.NumSubgraphs());
|
||||
TF_LITE_ENSURE(context,
|
||||
op_data->else_subgraph_index < graph_info.NumSubgraphs());
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, num_inputs,
|
||||
graph_info.NumSubgraphInputs(op_data->then_subgraph_index));
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context, num_outputs,
|
||||
graph_info.NumSubgraphOutputs(op_data->then_subgraph_index));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
|
||||
TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0);
|
||||
|
||||
TF_LITE_ENSURE(context, cond != nullptr);
|
||||
bool cond_value = cond->data.b[0];
|
||||
micro_context->DeallocateTempTfLiteTensor(cond);
|
||||
|
||||
MicroGraph* graph_info = µ_context->graph();
|
||||
// Currently we copy the input / output between the subgraphs.
|
||||
int active_branch_subgraph_index =
|
||||
cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index;
|
||||
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
tflite::micro::CopyOpInputsToSubgraphInputs(
|
||||
context, node, graph_info, active_branch_subgraph_index,
|
||||
/*first_tensor_idx=*/1));
|
||||
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
graph_info->InvokeSubgraph(active_branch_subgraph_index));
|
||||
|
||||
TF_LITE_ENSURE_OK(
|
||||
context, tflite::micro::CopySubgraphOutputsToOpOutputs(
|
||||
context, node, graph_info, active_branch_subgraph_index));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
TfLiteRegistration Register_IF() {
|
||||
return tflite::micro::RegisterOp(Init, Prepare, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,95 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/leaky_relu.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
template <typename T>
|
||||
void QuantizeLeakyRelu(const LeakyReluOpData& data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
LeakyReluParams op_params = {};
|
||||
|
||||
op_params.input_offset = data.input_zero_point;
|
||||
op_params.output_offset = data.output_zero_point;
|
||||
op_params.output_multiplier_alpha = data.output_multiplier_alpha;
|
||||
op_params.output_shift_alpha = data.output_shift_alpha;
|
||||
op_params.output_multiplier_identity = data.output_multiplier_identity;
|
||||
op_params.output_shift_identity = data.output_shift_identity;
|
||||
reference_ops::QuantizeLeakyRelu(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<T>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<T>(output));
|
||||
}
|
||||
|
||||
void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(LeakyReluOpData));
|
||||
}
|
||||
|
||||
TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
const LeakyReluOpData& data = *static_cast<LeakyReluOpData*>(node->user_data);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
LeakyReluParams op_params = {};
|
||||
const auto* params =
|
||||
static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
|
||||
|
||||
op_params.alpha = params->alpha;
|
||||
reference_ops::LeakyRelu(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt8: {
|
||||
QuantizeLeakyRelu<int8_t>(data, input, output);
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
case kTfLiteInt16: {
|
||||
QuantizeLeakyRelu<int16_t>(data, input, output);
|
||||
return kTfLiteOk;
|
||||
} break;
|
||||
default:
|
||||
MicroPrintf("Only float32, int8 are supported by LEAKY_RELU, got %s.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_LEAKY_RELU() {
|
||||
return tflite::micro::RegisterOp(LeakyReluInit, LeakyReluPrepare,
|
||||
LeakyReluEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,43 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Input/output tensor index.
|
||||
extern const int kInputTensor;
|
||||
extern const int kOutputTensor;
|
||||
|
||||
struct LeakyReluOpData {
|
||||
// quantization parameters
|
||||
int32_t output_multiplier_alpha;
|
||||
int32_t output_shift_alpha;
|
||||
int32_t output_multiplier_identity;
|
||||
int32_t output_shift_identity;
|
||||
int32_t input_zero_point;
|
||||
int32_t output_zero_point;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_
|
||||
@@ -1,78 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/leaky_relu.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Input/output tensor index.
|
||||
const int kInputTensor = 0;
|
||||
const int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context,
|
||||
TfLiteNode* node) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
|
||||
LeakyReluOpData* data = static_cast<LeakyReluOpData*>(node->user_data);
|
||||
const auto* params =
|
||||
static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
int output_shift_alpha;
|
||||
double alpha_multiplier = static_cast<double>(
|
||||
input->params.scale * params->alpha / output->params.scale);
|
||||
QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha,
|
||||
&output_shift_alpha);
|
||||
data->output_shift_alpha = static_cast<int32_t>(output_shift_alpha);
|
||||
|
||||
int output_shift_identity;
|
||||
double identity_multiplier =
|
||||
static_cast<double>(input->params.scale / output->params.scale);
|
||||
QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
|
||||
&output_shift_identity);
|
||||
data->output_shift_identity = static_cast<int32_t>(output_shift_identity);
|
||||
}
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
return CalculateOpDataLeakyRelu(context, node);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,44 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/micro/kernels/logical.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return LogicalImpl(context, node, LogicalOr);
|
||||
}
|
||||
|
||||
TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return LogicalImpl(context, node, LogicalAnd);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_LOGICAL_OR() {
|
||||
return tflite::micro::RegisterOp(nullptr, nullptr, LogicalOrEval);
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_LOGICAL_AND() {
|
||||
return tflite::micro::RegisterOp(nullptr, nullptr, LogicalAndEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,35 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
// Input/output tensor index.
|
||||
extern const int kLogicalInputTensor1;
|
||||
extern const int kLogicalInputTensor2;
|
||||
extern const int kLogicalOutputTensor;
|
||||
|
||||
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
bool (*func)(bool, bool));
|
||||
|
||||
bool LogicalOr(bool x, bool y);
|
||||
bool LogicalAnd(bool x, bool y);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
|
||||
@@ -1,63 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/logical.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Input/output tensor index.
|
||||
const int kLogicalInputTensor1 = 0;
|
||||
const int kLogicalInputTensor2 = 1;
|
||||
const int kLogicalOutputTensor = 0;
|
||||
|
||||
TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
bool (*func)(bool, bool)) {
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kLogicalInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kLogicalInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kLogicalOutputTensor);
|
||||
|
||||
if (tflite::micro::HaveSameShapes(input1, input2)) {
|
||||
reference_ops::BinaryFunction<bool, bool, bool>(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
} else {
|
||||
reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<bool>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<bool>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<bool>(output), func);
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
bool LogicalOr(bool x, bool y) { return x || y; }
|
||||
|
||||
bool LogicalAnd(bool x, bool y) { return x && y; }
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,111 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/logistic.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic));
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kLogisticInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteFloat32: {
|
||||
reference_ops::Logistic(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt16: {
|
||||
reference_integer_ops::Logistic(
|
||||
data->input_multiplier, data->input_left_shift,
|
||||
NumElements(input->dims),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8: {
|
||||
reference_integer_ops::Logistic(
|
||||
data->input_zero_point, data->input_range_radius,
|
||||
data->input_multiplier, data->input_left_shift,
|
||||
NumElements(input->dims),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
default:
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
// TODO(b/141211002): Also support other data types once we have supported
|
||||
// temporary tensors in TFLM.
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_LOGISTIC() {
|
||||
return tflite::micro::RegisterOp(LogisticInit, LogisticPrepare, LogisticEval);
|
||||
}
|
||||
} // namespace tflite
|
||||
@@ -1,42 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
extern const int kLogisticInputTensor;
|
||||
extern const int kLogisticOutputTensor;
|
||||
|
||||
struct OpDataLogistic {
|
||||
int32_t input_zero_point;
|
||||
int32_t input_range_radius;
|
||||
int32_t input_multiplier;
|
||||
int input_left_shift;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
|
||||
TfLiteNode* node,
|
||||
OpDataLogistic* data);
|
||||
|
||||
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
|
||||
@@ -1,119 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/logistic.h"
|
||||
|
||||
namespace tflite {
|
||||
const int kLogisticInputTensor = 0;
|
||||
const int kLogisticOutputTensor = 0;
|
||||
|
||||
TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
|
||||
TfLiteNode* node,
|
||||
OpDataLogistic* data) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kLogisticInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kLogisticOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||
std::numeric_limits<int8_t>::min());
|
||||
|
||||
static constexpr int kInputIntegerBits = 4;
|
||||
const double input_real_multiplier =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
|
||||
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||
|
||||
data->input_range_radius =
|
||||
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
|
||||
}
|
||||
|
||||
if (input->type == kTfLiteInt16) {
|
||||
static constexpr int kInputIntegerBits = 3;
|
||||
static constexpr int kOutputFractionalBits = 15;
|
||||
|
||||
// See comments in TanhPrepare about requiring zero_point==0
|
||||
// and a power-of-two ("POT") scale.
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
|
||||
int input_scale_log2_rounded;
|
||||
bool param_scale_pot =
|
||||
CheckedLog2(input->params.scale, &input_scale_log2_rounded);
|
||||
|
||||
data->input_left_shift =
|
||||
(15 - kInputIntegerBits) + input_scale_log2_rounded;
|
||||
param_scale_pot &= (data->input_left_shift == 0);
|
||||
|
||||
if (param_scale_pot) {
|
||||
data->input_multiplier = 0;
|
||||
} else {
|
||||
// Calculate multiplier to change input scale to 1/(3*4096)
|
||||
// as required by the table lookup.
|
||||
// In this scaling +/-2^17 represents +/-10.7
|
||||
double multiplier =
|
||||
static_cast<double>(input->params.scale) * 4096.0 * 3.0;
|
||||
|
||||
data->input_left_shift = 0;
|
||||
|
||||
while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
|
||||
data->input_left_shift++;
|
||||
multiplier = multiplier * 2.0;
|
||||
}
|
||||
|
||||
data->input_multiplier = static_cast<int32_t>(multiplier);
|
||||
}
|
||||
|
||||
int output_scale_log2_rounded;
|
||||
TF_LITE_ENSURE(
|
||||
context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
|
||||
TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
|
||||
-kOutputFractionalBits);
|
||||
}
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
|
||||
|
||||
return CalculateArithmeticOpDataLogistic(context, node, data);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,250 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Pamameters for integer LSTM.
|
||||
// Consider split this into two Integer Parameters if more fields are added.
|
||||
struct IntegerLstmParameter {
|
||||
int32_t effective_input_to_input_scale_a;
|
||||
int32_t effective_input_to_input_scale_b;
|
||||
int32_t effective_recurrent_to_input_scale_a;
|
||||
int32_t effective_recurrent_to_input_scale_b;
|
||||
int32_t effective_cell_to_input_scale_a;
|
||||
int32_t effective_cell_to_input_scale_b;
|
||||
int32_t effective_input_to_forget_scale_a;
|
||||
int32_t effective_input_to_forget_scale_b;
|
||||
int32_t effective_recurrent_to_forget_scale_a;
|
||||
int32_t effective_recurrent_to_forget_scale_b;
|
||||
int32_t effective_cell_to_forget_scale_a;
|
||||
int32_t effective_cell_to_forget_scale_b;
|
||||
int32_t effective_input_to_cell_scale_a;
|
||||
int32_t effective_input_to_cell_scale_b;
|
||||
int32_t effective_recurrent_to_cell_scale_a;
|
||||
int32_t effective_recurrent_to_cell_scale_b;
|
||||
int32_t effective_input_to_output_scale_a;
|
||||
int32_t effective_input_to_output_scale_b;
|
||||
int32_t effective_recurrent_to_output_scale_a;
|
||||
int32_t effective_recurrent_to_output_scale_b;
|
||||
int32_t effective_cell_to_output_scale_a;
|
||||
int32_t effective_cell_to_output_scale_b;
|
||||
int32_t effective_proj_scale_a;
|
||||
int32_t effective_proj_scale_b;
|
||||
int32_t effective_hidden_scale_a;
|
||||
int32_t effective_hidden_scale_b;
|
||||
int32_t layer_norm_input_scale_a;
|
||||
int32_t layer_norm_input_scale_b;
|
||||
int32_t layer_norm_forget_scale_a;
|
||||
int32_t layer_norm_forget_scale_b;
|
||||
int32_t layer_norm_cell_scale_a;
|
||||
int32_t layer_norm_cell_scale_b;
|
||||
int32_t layer_norm_output_scale_a;
|
||||
int32_t layer_norm_output_scale_b;
|
||||
// Quantized clip value for cell and projection. Zero value means no clipping.
|
||||
int16_t quantized_cell_clip;
|
||||
int8_t quantized_proj_clip;
|
||||
int32_t hidden_zp;
|
||||
int32_t cell_scale;
|
||||
|
||||
int32_t input_variance_guard;
|
||||
int32_t forget_variance_guard;
|
||||
int32_t cell_variance_guard;
|
||||
int32_t output_variance_guard;
|
||||
|
||||
// Pre-calculate bias + zero_point * weight.
|
||||
int32_t* input_to_forget_effective_bias;
|
||||
int32_t* recurrent_to_forget_effective_bias;
|
||||
int32_t* input_to_cell_effective_bias;
|
||||
int32_t* recurrent_to_cell_effective_bias;
|
||||
int32_t* input_to_output_effective_bias;
|
||||
int32_t* recurrent_to_output_effective_bias;
|
||||
int32_t* input_to_input_effective_bias;
|
||||
int32_t* recurrent_to_input_effective_bias;
|
||||
int32_t* projection_effective_bias;
|
||||
|
||||
// Scale and zero point for intermediate tensors.
|
||||
// Used only in the 8x8_8 case.
|
||||
int32_t intermediate_scale_a[8];
|
||||
int32_t intermediate_scale_b[8];
|
||||
int32_t intermediate_zp[12];
|
||||
};
|
||||
|
||||
// Scales for hybrid op with integer inputs and float weights
|
||||
struct HybridLstmScales {
|
||||
float input_to_input_weights_scale;
|
||||
float input_to_forget_weights_scale;
|
||||
float input_to_cell_weights_scale;
|
||||
float input_to_output_weights_scale;
|
||||
float aux_input_to_input_weights_scale;
|
||||
float aux_input_to_forget_weights_scale;
|
||||
float aux_input_to_cell_weights_scale;
|
||||
float aux_input_to_output_weights_scale;
|
||||
float recurrent_to_input_weights_scale;
|
||||
float recurrent_to_forget_weights_scale;
|
||||
float recurrent_to_cell_weights_scale;
|
||||
float recurrent_to_output_weights_scale;
|
||||
float cell_to_input_weights_scale;
|
||||
float cell_to_forget_weights_scale;
|
||||
float cell_to_output_weights_scale;
|
||||
float projection_weights_scale;
|
||||
};
|
||||
|
||||
TfLiteStatus EvalFloatLstm(
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* input_to_input_weights,
|
||||
const TfLiteEvalTensor* input_to_forget_weights,
|
||||
const TfLiteEvalTensor* input_to_cell_weights,
|
||||
const TfLiteEvalTensor* input_to_output_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_input_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_forget_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_cell_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_output_weights,
|
||||
const TfLiteEvalTensor* cell_to_input_weights,
|
||||
const TfLiteEvalTensor* cell_to_forget_weights,
|
||||
const TfLiteEvalTensor* cell_to_output_weights,
|
||||
const TfLiteEvalTensor* input_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* forget_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* cell_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* output_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* aux_input,
|
||||
const TfLiteEvalTensor* aux_input_to_input_weights,
|
||||
const TfLiteEvalTensor* aux_input_to_forget_weights,
|
||||
const TfLiteEvalTensor* aux_input_to_cell_weights,
|
||||
const TfLiteEvalTensor* aux_input_to_output_weights,
|
||||
const TfLiteEvalTensor* input_gate_bias,
|
||||
const TfLiteEvalTensor* forget_gate_bias,
|
||||
const TfLiteEvalTensor* cell_gate_bias,
|
||||
const TfLiteEvalTensor* output_gate_bias,
|
||||
const TfLiteEvalTensor* projection_weights,
|
||||
const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
|
||||
bool forward_sequence, bool time_major, int output_offset,
|
||||
float* scratch_buffer, TfLiteEvalTensor* output_state,
|
||||
TfLiteEvalTensor* cell_state, TfLiteEvalTensor* output);
|
||||
|
||||
TfLiteStatus EvalHybridLstm(
|
||||
const HybridLstmScales* hybrid_lstm_scales, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* input_to_input_weights,
|
||||
const TfLiteEvalTensor* input_to_input_weights_ledger,
|
||||
const TfLiteEvalTensor* input_to_forget_weights,
|
||||
const TfLiteEvalTensor* input_to_forget_weights_ledger,
|
||||
const TfLiteEvalTensor* input_to_cell_weights,
|
||||
const TfLiteEvalTensor* input_to_cell_weights_ledger,
|
||||
const TfLiteEvalTensor* input_to_output_weights,
|
||||
const TfLiteEvalTensor* input_to_output_weights_ledger,
|
||||
const TfLiteEvalTensor* recurrent_to_input_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_input_weights_ledger,
|
||||
const TfLiteEvalTensor* recurrent_to_forget_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_forget_weights_ledger,
|
||||
const TfLiteEvalTensor* recurrent_to_cell_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_cell_weights_ledger,
|
||||
const TfLiteEvalTensor* recurrent_to_output_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_output_weights_ledger,
|
||||
const TfLiteEvalTensor* cell_to_input_weights,
|
||||
const TfLiteEvalTensor* cell_to_forget_weights,
|
||||
const TfLiteEvalTensor* cell_to_output_weights,
|
||||
const TfLiteEvalTensor* input_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* forget_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* cell_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* output_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* aux_input,
|
||||
const TfLiteEvalTensor* aux_input_to_input_weights,
|
||||
const TfLiteEvalTensor* aux_input_to_forget_weights,
|
||||
const TfLiteEvalTensor* aux_input_to_cell_weights,
|
||||
const TfLiteEvalTensor* aux_input_to_output_weights,
|
||||
const TfLiteEvalTensor* input_gate_bias,
|
||||
const TfLiteEvalTensor* forget_gate_bias,
|
||||
const TfLiteEvalTensor* cell_gate_bias,
|
||||
const TfLiteEvalTensor* output_gate_bias,
|
||||
const TfLiteEvalTensor* projection_weights,
|
||||
const TfLiteEvalTensor* projection_weights_ledger,
|
||||
const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
|
||||
bool forward_sequence, bool time_major, int output_offset,
|
||||
float* scratch_buffer, float* input_sf, float* aux_input_sf,
|
||||
float* output_state_sf, float* prod_scaling_factors,
|
||||
float* recovered_cell_weights, int8_t* input_quantized,
|
||||
int8_t* aux_input_quantized, int8_t* output_state_quantized,
|
||||
int8_t* cell_state_quantized, float* scales, TfLiteEvalTensor* output_state,
|
||||
TfLiteEvalTensor* cell_state, int32_t* output_scratch_buffer,
|
||||
TfLiteEvalTensor* output, int32_t* input_zp, int32_t* aux_input_zp,
|
||||
int32_t* output_state_zp, int32_t* row_sums, int row_sums_size,
|
||||
bool* compute_row_sums);
|
||||
|
||||
TfLiteStatus EvalInteger8x8_16Lstm(
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* input_to_input_weights,
|
||||
const TfLiteEvalTensor* input_to_forget_weights,
|
||||
const TfLiteEvalTensor* input_to_cell_weights,
|
||||
const TfLiteEvalTensor* input_to_output_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_input_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_forget_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_cell_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_output_weights,
|
||||
const TfLiteEvalTensor* cell_to_input_weights,
|
||||
const TfLiteEvalTensor* cell_to_forget_weights,
|
||||
const TfLiteEvalTensor* cell_to_output_weights,
|
||||
const TfLiteEvalTensor* input_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* forget_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* cell_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* output_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* input_gate_bias,
|
||||
const TfLiteEvalTensor* forget_gate_bias,
|
||||
const TfLiteEvalTensor* cell_gate_bias,
|
||||
const TfLiteEvalTensor* output_gate_bias,
|
||||
const TfLiteEvalTensor* projection_weights,
|
||||
const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
|
||||
bool forward_sequence, bool time_major,
|
||||
const IntegerLstmParameter* integer_lstm_param, int32_t output_state_zp,
|
||||
TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state,
|
||||
TfLiteEvalTensor* output, int16_t* scratch0, int16_t* scratch1,
|
||||
int16_t* scratch2, int16_t* scratch3, int8_t* scratch4, int32_t* scratch5);
|
||||
|
||||
TfLiteStatus EvalInteger8x8_8Lstm(
|
||||
const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* input_to_input_weights,
|
||||
const TfLiteEvalTensor* input_to_forget_weights,
|
||||
const TfLiteEvalTensor* input_to_cell_weights,
|
||||
const TfLiteEvalTensor* input_to_output_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_input_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_forget_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_cell_weights,
|
||||
const TfLiteEvalTensor* recurrent_to_output_weights,
|
||||
const TfLiteEvalTensor* cell_to_input_weights,
|
||||
const TfLiteEvalTensor* cell_to_forget_weights,
|
||||
const TfLiteEvalTensor* cell_to_output_weights,
|
||||
const TfLiteEvalTensor* input_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* forget_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* cell_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* output_layer_norm_coefficients,
|
||||
const TfLiteEvalTensor* input_gate_bias,
|
||||
const TfLiteEvalTensor* forget_gate_bias,
|
||||
const TfLiteEvalTensor* cell_gate_bias,
|
||||
const TfLiteEvalTensor* output_gate_bias,
|
||||
const TfLiteEvalTensor* projection_weights,
|
||||
const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
|
||||
TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state,
|
||||
TfLiteEvalTensor* output, const IntegerLstmParameter* integer_lstm_param,
|
||||
int8_t* scratch0, int8_t* scratch1, int16_t* scratch2, int16_t* scratch3,
|
||||
int16_t* scratch4, int16_t* scratch5, int16_t* scratch6, int16_t* scratch7);
|
||||
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_H_
|
||||
@@ -1,67 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Input Tensors of size {n_batch, n_input}
|
||||
constexpr int kLstmInputTensor = 0;
|
||||
|
||||
// Input weight tensors of size: {n_cell, n_input}
|
||||
constexpr int kLstmInputToInputWeightsTensor = 1; // Optional
|
||||
constexpr int kLstmInputToForgetWeightsTensor = 2;
|
||||
constexpr int kLstmInputToCellWeightsTensor = 3;
|
||||
constexpr int kLstmInputToOutputWeightsTensor = 4;
|
||||
|
||||
// Recurrent weight tensors of size {n_cell, n_output}
|
||||
constexpr int kLstmRecurrentToInputWeightsTensor = 5; // Optional
|
||||
constexpr int kLstmRecurrentToForgetWeightsTensor = 6;
|
||||
constexpr int kLstmRecurrentToCellWeightsTensor = 7;
|
||||
constexpr int kLstmRecurrentToOutputWeightsTensor = 8;
|
||||
|
||||
// Peephole weights tensors of size {n_cell}, representing a diagonal matrix.
|
||||
constexpr int kLstmCellToInputWeightsTensor = 9; // Optional
|
||||
constexpr int kLstmCellToForgetWeightsTensor = 10; // Optional
|
||||
constexpr int kLstmCellToOutputWeightsTensor = 11; // Optional
|
||||
|
||||
// Gates bias tensors of size {n_cell}
|
||||
constexpr int kLstmInputGateBiasTensor = 12; // Optional
|
||||
constexpr int kLstmForgetGateBiasTensor = 13;
|
||||
constexpr int kLstmCellGateBiasTensor = 14;
|
||||
constexpr int kLstmOutputGateBiasTensor = 15;
|
||||
|
||||
// Projection weight tensor of size {n_output, n_cell}
|
||||
constexpr int kLstmProjectionWeightsTensor = 16; // Optional
|
||||
// Projection bias tensor of size {n_output}
|
||||
constexpr int kLstmProjectionBiasTensor = 17; // Optional
|
||||
|
||||
// These state tensors are defined as variable tensors, and will be modified by
|
||||
// this op.
|
||||
constexpr int kLstmOutputStateTensor = 18;
|
||||
constexpr int kLstmCellStateTensor = 19;
|
||||
|
||||
// Layer norm coefficient tensors of size {n_cell}, representing a diagonal
|
||||
// matrix.
|
||||
constexpr int kLstmInputLayerNormCoefficientsTensor = 20; // Optional
|
||||
constexpr int kLstmForgetLayerNormCoefficientsTensor = 21; // Optional
|
||||
constexpr int kLstmCellLayerNormCoefficientsTensor = 22; // Optional
|
||||
constexpr int kLstmOutputLayerNormCoefficientsTensor = 23; // Optional
|
||||
|
||||
// Output tensors.
|
||||
constexpr int kLstmOutputTensor = 0;
|
||||
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
|
||||
@@ -1,874 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// This file and the associated .cc file is branched from
|
||||
// tensorflow/lite/kernels/internal/reference/portable_tensor_utils*
|
||||
// TFLM needs to create its own because the original files are coupled with
|
||||
// the tensor_utils module, which we cannot reuse due to its use of the
|
||||
// Eigen library.
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define __restrict__ __restrict
|
||||
#endif
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Not all backends support CpuBackendContext usage, so forward declare to avoid
|
||||
// pulling in its implementation.
|
||||
// TODO(b/230666277): consider removing this since micro does not utilize it
|
||||
class CpuBackendContext;
|
||||
|
||||
namespace micro_tensor_utils {
|
||||
|
||||
template <typename T>
|
||||
inline bool PortableIsZeroVector(const T* vector, int v_size) {
|
||||
for (int i = 0; i < v_size; ++i) {
|
||||
if (vector[i] != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void PortableSymmetricQuantizeFloats(const float* values, const int size,
|
||||
int8_t* quantized_values, float* min_value,
|
||||
float* max_value, float* scaling_factor);
|
||||
|
||||
void PortableSymmetricQuantizeFloats(const float* values, const int size,
|
||||
int8_t* quantized_values, float min_value,
|
||||
float max_value, float* scaling_factor);
|
||||
|
||||
void PortableAsymmetricQuantizeFloats(const float* values, const int size,
|
||||
int8_t* quantized_values,
|
||||
float* scaling_factor, int32_t* offset);
|
||||
|
||||
// Multiply a matrix by a batch vector, and store results in a batch-size
|
||||
// vector.
|
||||
void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
|
||||
int m_rows, int m_cols,
|
||||
const float* vector,
|
||||
int n_batch, float* result);
|
||||
|
||||
void PortableMatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
|
||||
const int8_t* __restrict__ vectors, const float* scaling_factors,
|
||||
int n_batch, float* __restrict__ result);
|
||||
|
||||
void PortableMatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
|
||||
const int8_t* __restrict__ vectors, const float* scaling_factors,
|
||||
int n_batch, float* __restrict__ result, const float* per_channel_scale,
|
||||
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
|
||||
bool* compute_row_sums, CpuBackendContext* context);
|
||||
|
||||
void PortableMatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
|
||||
const int8_t* __restrict__ vector, const float* scaling_factors,
|
||||
int n_batch, int32_t* scratch, float* __restrict__ result,
|
||||
CpuBackendContext* context);
|
||||
|
||||
void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
|
||||
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
|
||||
const int32_t* __restrict__ indices, int m_rows, int m_cols,
|
||||
const float* __restrict__ vector, int n_batch, float* __restrict__ result);
|
||||
|
||||
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
|
||||
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
|
||||
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
|
||||
float* __restrict__ result);
|
||||
|
||||
void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
|
||||
const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
|
||||
const int32_t* __restrict__ indices, int m_rows, int m_cols,
|
||||
const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
|
||||
int n_batch, const int32_t input_offset, const int32_t output_multiplier,
|
||||
const int32_t output_shift, const int32_t output_offset,
|
||||
const int32_t output_activation_min, const int32_t output_activation_max,
|
||||
int8_t* __restrict__ result);
|
||||
|
||||
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
|
||||
const int m_cols, const int8_t* __restrict__ vectors,
|
||||
const float* scaling_factors, int n_batch, float* __restrict__ result);
|
||||
|
||||
// Dot product of two vectors.
|
||||
float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
|
||||
int v_size);
|
||||
|
||||
void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
|
||||
const int16_t* vector2,
|
||||
int v_size, int n_batch,
|
||||
int32_t* result);
|
||||
|
||||
void PortableVectorBatchVectorCwiseProductAccumulate(
|
||||
const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
|
||||
int32_t multiplier, int shift, int16_t* result);
|
||||
|
||||
void PortableMatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* input, const int32_t* bias,
|
||||
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
|
||||
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
|
||||
int32_t* scratch, int16_t* output, CpuBackendContext* context);
|
||||
|
||||
void PortableMatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* input, const int32_t* bias,
|
||||
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
|
||||
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
|
||||
int32_t* scratch, int8_t* output, CpuBackendContext* context);
|
||||
|
||||
void PortableMatrixBatchVectorMultiply(const int8_t* input,
|
||||
int32_t input_zeropoint,
|
||||
const int8_t* input_to_gate_weights,
|
||||
int32_t input_to_gate_effective_scale_a,
|
||||
int32_t input_to_gate_effective_scale_b,
|
||||
int32_t n_batch, int32_t n_input,
|
||||
int32_t n_cell, int8_t* gate_output,
|
||||
int8_t gate_output_zp);
|
||||
|
||||
void PortableMatrixBatchVectorMultiply(
|
||||
const int16_t* hidden, const int8_t* hidden_to_output_weights,
|
||||
int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
|
||||
const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
|
||||
int32_t n_output, int32_t output_zp, int8_t* proj_output);
|
||||
|
||||
void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
|
||||
int32_t scalar, int32_t n_row,
|
||||
int32_t n_col, int32_t* output);
|
||||
|
||||
void PortableApplyLayerNorm(const int16_t* input,
|
||||
const int16_t* layer_norm_weights,
|
||||
const int32_t* bias, int32_t layer_norm_scale_a,
|
||||
int32_t layer_norm_scale_b, int32_t variance_limit,
|
||||
int n_batch, int n_input, int16_t* output);
|
||||
|
||||
void PortableApplyLayerNormFloat(const int16_t* input,
|
||||
const int16_t* layer_norm_weights,
|
||||
int32_t layer_norm_scale_a,
|
||||
int32_t layer_norm_scale_b,
|
||||
const int32_t* bias, int n_batch, int n_input,
|
||||
int16_t* output);
|
||||
|
||||
void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
|
||||
int32_t n_input, int16_t* output);
|
||||
|
||||
void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
|
||||
int32_t n_input, int16_t* output);
|
||||
|
||||
void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
|
||||
int32_t n_batch, int32_t n_input, int16_t* output);
|
||||
|
||||
void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
|
||||
int32_t n_input, int32_t integer_bits,
|
||||
int16_t* output);
|
||||
|
||||
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
|
||||
int n_batch, int n_input, int shift, int16_t* output);
|
||||
|
||||
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
|
||||
int32_t multiplier, int32_t shift, int32_t n_batch,
|
||||
int32_t n_input, int32_t output_zp, int8_t* output);
|
||||
|
||||
void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
|
||||
int n_batch, int n_input, int16_t* output);
|
||||
|
||||
template <typename T>
|
||||
inline void PortableCwiseClipping(T* vector, const int v_size,
|
||||
const T& clipping_value) {
|
||||
for (int i = 0; i < v_size; i++) {
|
||||
vector[i] = std::max(std::min(clipping_value, vector[i]),
|
||||
static_cast<T>(-clipping_value));
|
||||
}
|
||||
}
|
||||
|
||||
// Batch vector initialization with another vector.
|
||||
void PortableVectorBatchVectorAssign(const float* vector, int v_size,
|
||||
int n_batch, float* batch_vector);
|
||||
|
||||
// Compute "1.0f - elements of vector" (used in CIFG).
|
||||
void PortableSub1Vector(const float* vector, int v_size, float* result);
|
||||
|
||||
void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result);
|
||||
|
||||
// Multiply all elements of vector with a scalar.
|
||||
void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
|
||||
float* result);
|
||||
|
||||
// Reduce-sum on a vector:
|
||||
// input_vector: pointer to input vector.
|
||||
// output_vector: pointer to vector.
|
||||
// output_size: output vector size.
|
||||
// reduction_size: number of consecutive elements from input vector which are
|
||||
// added to get one element of output.
|
||||
template <typename INPUT, typename OUTPUT>
|
||||
inline void PortableReductionSumVector(const INPUT* input_vector,
|
||||
OUTPUT* output_vector, int output_size,
|
||||
int reduction_size) {
|
||||
for (int o = 0; o < output_size; o++) {
|
||||
OUTPUT result = 0;
|
||||
for (int r = 0; r < reduction_size; r++) {
|
||||
result += input_vector[r];
|
||||
}
|
||||
output_vector[o] = result;
|
||||
input_vector += reduction_size;
|
||||
}
|
||||
}
|
||||
|
||||
// Layer norm for each batch.
|
||||
void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
|
||||
float* __restrict__ output_vector,
|
||||
int v_size, int n_batch);
|
||||
|
||||
// Saturate Add.
|
||||
void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
|
||||
const int8_t* recurrent, int8_t recurrent_zp,
|
||||
int32_t input_effective_scale_a,
|
||||
int32_t input_effective_scale_b,
|
||||
int32_t recurrent_effective_scale_a,
|
||||
int32_t recurrent_effective_scale_b,
|
||||
int32_t n_batch, int32_t n_cell,
|
||||
int16_t* output);
|
||||
|
||||
// Add another vector for each batch in the batch vector.
|
||||
template <typename T>
|
||||
inline void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch,
|
||||
T* batch_vector) {
|
||||
for (int b = 0; b < n_batch; b++) {
|
||||
for (int i = 0; i < v_size; ++i) {
|
||||
batch_vector[i] += vector[i];
|
||||
}
|
||||
batch_vector += v_size;
|
||||
}
|
||||
}
|
||||
|
||||
// Cwise product of two vectors.
|
||||
template <typename T>
|
||||
inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2,
|
||||
int v_size, T* result) {
|
||||
for (int v = 0; v < v_size; v++) {
|
||||
*result++ = *vector1++ * *vector2++;
|
||||
}
|
||||
}
|
||||
|
||||
// Cwise product of a vector and a batch-vector.
|
||||
template <typename T>
|
||||
inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size,
|
||||
const T* batch_vector, int n_batch,
|
||||
T* result) {
|
||||
for (int b = 0; b < n_batch; b++) {
|
||||
VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
|
||||
// Update the pointers.
|
||||
result += v_size;
|
||||
batch_vector += v_size;
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce-sum on a float input vector:
|
||||
// input_vector: float pointer to input vector.
|
||||
// output_vector: float pointer to vector.
|
||||
// output_size: output vector size.
|
||||
// reduction_size: number of consecutive elements from input vector which are
|
||||
// added to get one element of output.
|
||||
inline void ReductionSumVector(const float* input_vector, float* output_vector,
|
||||
int output_size, int reduction_size) {
|
||||
PortableReductionSumVector(input_vector, output_vector, output_size,
|
||||
reduction_size);
|
||||
}
|
||||
|
||||
// Same as above but input/output is 32 bit integer.
|
||||
inline void ReductionSumVector(const int32_t* input_vector,
|
||||
int32_t* output_vector, int output_size,
|
||||
int reduction_size) {
|
||||
PortableReductionSumVector(input_vector, output_vector, output_size,
|
||||
reduction_size);
|
||||
}
|
||||
|
||||
// Same as above but input is 8 bit integer.
|
||||
inline void ReductionSumVector(const int8_t* input_vector,
|
||||
int32_t* output_vector, int output_size,
|
||||
int reduction_size) {
|
||||
PortableReductionSumVector(input_vector, output_vector, output_size,
|
||||
reduction_size);
|
||||
}
|
||||
|
||||
// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
|
||||
// assumption here is that result array is initialized to valid values.
|
||||
template <typename T>
|
||||
inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
|
||||
const T* __restrict__ vector2,
|
||||
int v_size,
|
||||
T* __restrict__ result) {
|
||||
for (int v = 0; v < v_size; v++) {
|
||||
*result++ += *vector1++ * *vector2++;
|
||||
}
|
||||
}
|
||||
|
||||
// Batch vector initialization with another vector.
|
||||
template <typename T>
|
||||
inline void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
|
||||
T* batch_vector) {
|
||||
for (int b = 0; b < n_batch; b++) {
|
||||
std::copy_n(vector, v_size, batch_vector + b * v_size);
|
||||
}
|
||||
}
|
||||
|
||||
inline void SymmetricQuantizeFloats(const float* values, const int size,
|
||||
int8_t* quantized_values, float* min,
|
||||
float* max, float* scaling_factor) {
|
||||
PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max,
|
||||
scaling_factor);
|
||||
}
|
||||
|
||||
inline void SymmetricQuantizeFloats(const float* values, const int size,
|
||||
int8_t* quantized_values, float min_value,
|
||||
float max_value, float* scaling_factor) {
|
||||
PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value,
|
||||
max_value, scaling_factor);
|
||||
}
|
||||
|
||||
inline void AsymmetricQuantizeFloats(const float* values, const int size,
|
||||
int8_t* quantized_values,
|
||||
float* scaling_factor, int32_t* offset) {
|
||||
PortableAsymmetricQuantizeFloats(values, size, quantized_values,
|
||||
scaling_factor, offset);
|
||||
}
|
||||
|
||||
// Helper function to quantize floats.
|
||||
// float_data_ptr input float vectors
|
||||
// n_batch number of input vectors
|
||||
// n_data size of a single input vector
|
||||
// quantized_data_ptr (out) vector with quantized data
|
||||
// scaling_factors (out) scaling factors (one per vector)
|
||||
// zero_points (out) zero points (one per vector)
|
||||
// do_asymmetric controls if the quantization should be asymmetric.
|
||||
inline void BatchQuantizeFloats(const float* float_data_ptr, int n_batch,
|
||||
int n_data, int8_t* quantized_data_ptr,
|
||||
float* scaling_factors, int32_t* zero_points,
|
||||
bool do_asymmetric) {
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
const int offset = b * n_data;
|
||||
if (do_asymmetric) {
|
||||
AsymmetricQuantizeFloats(float_data_ptr + offset, n_data,
|
||||
quantized_data_ptr + offset, &scaling_factors[b],
|
||||
&zero_points[b]);
|
||||
} else {
|
||||
float unused_min, unused_max;
|
||||
SymmetricQuantizeFloats(float_data_ptr + offset, n_data,
|
||||
quantized_data_ptr + offset, &unused_min,
|
||||
&unused_max, &scaling_factors[b]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if all entries of a vector are zero for float.
|
||||
inline bool IsZeroVector(const float* vector, int v_size) {
|
||||
return PortableIsZeroVector(vector, v_size);
|
||||
}
|
||||
|
||||
// Check if all entries of a vector are zero for int8_t.
|
||||
inline bool IsZeroVector(const int8_t* vector, int v_size) {
|
||||
return PortableIsZeroVector(vector, v_size);
|
||||
}
|
||||
|
||||
// Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized
|
||||
// vector.
|
||||
// Parameters:
|
||||
// - input: batch vector of size n_batch * n_input; 16 bit.
|
||||
// - layer_norm_weights: the quantized layer normalization weights.
|
||||
// - bias: the bias for the layer normalization.
|
||||
// - layer_norm_scale_a: multiplier for scale factor.
|
||||
// - layer_norm_scale_b: shift for scale factor.
|
||||
// - variance_limit: the guard to make sure the inverse does not overflow.
|
||||
// - n_batch: the number of batches.
|
||||
// - n_input: the size for input and output.
|
||||
// - output: the 16 bit output
|
||||
inline void ApplyLayerNorm(const int16_t* input,
|
||||
const int16_t* layer_norm_weights,
|
||||
const int32_t* bias, int32_t layer_norm_scale_a,
|
||||
int32_t layer_norm_scale_b, int32_t variance_limit,
|
||||
int n_batch, int n_input, int16_t* output) {
|
||||
PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a,
|
||||
layer_norm_scale_b, variance_limit, n_batch, n_input,
|
||||
output);
|
||||
}
|
||||
|
||||
// Same as above but the internal calculation is done in float.
|
||||
inline void ApplyLayerNormFloat(const int16_t* input,
|
||||
const int16_t* layer_norm_weights,
|
||||
int32_t layer_norm_scale_a,
|
||||
int32_t layer_norm_scale_b, const int32_t* bias,
|
||||
int n_batch, int n_input, int16_t* output) {
|
||||
PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a,
|
||||
layer_norm_scale_b, bias, n_batch, n_input,
|
||||
output);
|
||||
}
|
||||
|
||||
// Apply Sigmoid to a quantized vector.
|
||||
// Parameters:
|
||||
// - input: batch vector of size n_batch * n_input; 16 bit.
|
||||
// - n_batch: the number of batches.
|
||||
// - n_input: the size for input and output.
|
||||
// - output: the 16 bit output
|
||||
// The input is in Q3.12 format and the output is in Q0.15 format.
|
||||
inline void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
|
||||
int16_t* output) {
|
||||
PortableApplySigmoid(input, n_batch, n_input, output);
|
||||
}
|
||||
|
||||
// Same as above but the internal calcualtion is float.
|
||||
inline void ApplySigmoidFloat(const int16_t* input, int32_t n_batch,
|
||||
int32_t n_input, int16_t* output) {
|
||||
PortableApplySigmoidFloat(input, n_batch, n_input, output);
|
||||
}
|
||||
|
||||
// Apply Tanh to a quantized vector.
|
||||
// Parameters:
|
||||
// - integer_bits: the integer bits of the input.
|
||||
// Currently supports 0, 1, 2, 3, 4, 5, 6.
|
||||
// - input: batch vector of size n_batch * n_input; 16 bit.
|
||||
// - n_batch: the number of batches.
|
||||
// - n_input: the size for input and output.
|
||||
// - output: the 16 bit output
|
||||
// The input is in Qm.15-m format and the output is in Q0.15 format.
|
||||
inline void ApplyTanh(int32_t integer_bits, const int16_t* input,
|
||||
int32_t n_batch, int32_t n_input, int16_t* output) {
|
||||
PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
|
||||
}
|
||||
|
||||
// Apply Tanh to a quantized vector. Tbe internal calculation is in float.
|
||||
// - Input has 2^(integer_bits) as scale.
|
||||
// - Output has Q0.15 as scale.
|
||||
inline void ApplyTanhFloat(const int16_t* input, int32_t n_batch,
|
||||
int32_t n_input, int32_t integer_bits,
|
||||
int16_t* output) {
|
||||
PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output);
|
||||
}
|
||||
|
||||
// Element-wise multiplication of two quantized vectors.
|
||||
// Parameters:
|
||||
// - input_1: batch vector of size n_batch * n_input; 16 bit.
|
||||
// - input_2: batch vector of size n_batch * n_input; 16 bit.
|
||||
// - n_batch: the number of batches.
|
||||
// - n_input: the size for input and output.
|
||||
// - shift: the shift needed to produce the output.
|
||||
// - output: the 16 bit output of size n_batch * n_input.
|
||||
// Output does not need to be initialized.
|
||||
inline void CwiseMul(const int16_t* input_1, const int16_t* input_2,
|
||||
int n_batch, int n_input, int shift, int16_t* output) {
|
||||
PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output);
|
||||
}
|
||||
|
||||
// Element-wise multiplication of two quantized vectors with rescaling.
|
||||
// Parameters:
|
||||
// - input_1: batch vector of size n_batch * n_input; 16 bit.
|
||||
// - input_2: batch vector of size n_batch * n_input; 16 bit.
|
||||
// - multiplier: the multiplier part of scale.
|
||||
// - shift: the shift part of scale.
|
||||
// - n_batch: the number of batches.
|
||||
// - n_input: the size for input and output.
|
||||
// - output: the 8 bit output of size n_batch * n_input.
|
||||
// - output_zp: the zero point of output.
|
||||
// Output does not need to be initialized.
|
||||
// Multiplier ("m") and shift ("s") are connected to scale ("s") with s = m *
|
||||
// 2^(s - 31).
|
||||
inline void CwiseMul(const int16_t* input_1, const int16_t* input_2,
|
||||
int32_t multiplier, int32_t shift, int32_t n_batch,
|
||||
int32_t n_input, int32_t output_zp, int8_t* output) {
|
||||
PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input,
|
||||
output_zp, output);
|
||||
}
|
||||
|
||||
// Element-wise in-place clipping of a vector. Overloaded for float, int16_t,
|
||||
// int8_t. Parameters:
|
||||
// - vector: vector of size v_size.
|
||||
// - v_size: the size of the vector.
|
||||
// - clipping_value: the value used for clipping.
|
||||
inline void CwiseClipping(float* vector, const int v_size,
|
||||
const float clipping_value) {
|
||||
PortableCwiseClipping(vector, v_size, clipping_value);
|
||||
}
|
||||
|
||||
inline void CwiseClipping(int16_t* vector, const int v_size,
|
||||
const int16_t clipping_value) {
|
||||
PortableCwiseClipping(vector, v_size, clipping_value);
|
||||
}
|
||||
|
||||
inline void CwiseClipping(int8_t* vector, const int v_size,
|
||||
const int8_t clipping_value) {
|
||||
PortableCwiseClipping(vector, v_size, clipping_value);
|
||||
}
|
||||
|
||||
// Element-wise saturating addition of two quantized vectors without rescaling.
|
||||
// Parameters:
|
||||
// - input_1: batch vector of size n_batch * n_input; 16 bit.
|
||||
// - input_2: batch vector of size n_batch * n_input; 16 bit.
|
||||
// - n_batch: the number of batches.
|
||||
// - n_input: the size for input and output.
|
||||
// - output: the 8 bit output of size n_batch * n_input.
|
||||
// Output does not need to be initialized.
|
||||
inline void CwiseAdd(const int16_t* input_1, const int16_t* input_2,
|
||||
int n_batch, int n_input, int16_t* output) {
|
||||
PortableCwiseAdd(input_1, input_2, n_batch, n_input, output);
|
||||
}
|
||||
|
||||
inline void MeanStddevNormalization(const float* input_vector,
|
||||
float* output_vector, int v_size,
|
||||
int n_batch) {
|
||||
PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
|
||||
}
|
||||
|
||||
inline void Sub1Vector(const float* vector, int v_size, float* result) {
|
||||
PortableSub1Vector(vector, v_size, result);
|
||||
}
|
||||
|
||||
inline void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
|
||||
PortableSub1Vector(vector, v_size, result);
|
||||
}
|
||||
|
||||
// Multiply all elements of vector with a scalar.
|
||||
inline void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
|
||||
float* result) {
|
||||
PortableVectorScalarMultiply(vector, v_size, scale, result);
|
||||
}
|
||||
|
||||
// Saturate Add with rescale on both inputs.
|
||||
inline void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
|
||||
const int8_t* recurrent, int8_t recurrent_zp,
|
||||
int32_t input_effective_scale_a,
|
||||
int32_t input_effective_scale_b,
|
||||
int32_t recurrent_effective_scale_a,
|
||||
int32_t recurrent_effective_scale_b,
|
||||
int32_t n_batch, int32_t n_cell,
|
||||
int16_t* output) {
|
||||
PortableTwoGateSaturatingAdd(
|
||||
input, input_zp, recurrent, recurrent_zp, input_effective_scale_a,
|
||||
input_effective_scale_b, recurrent_effective_scale_a,
|
||||
recurrent_effective_scale_b, n_batch, n_cell, output);
|
||||
}
|
||||
|
||||
// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
|
||||
// dimension composed by input vectors independent from each other). The result
|
||||
// of the multiplication is accumulated to the passed result buffer.
|
||||
// More specifically, for a matrix M of shape [n, i] and a batched-vector
|
||||
// of shape [i, batch] it will first compute the product of shape [n, batch].
|
||||
// This product will be accumulated to the result buffer.
|
||||
inline void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
|
||||
int m_cols, const float* vector,
|
||||
int n_batch, float* result) {
|
||||
PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
|
||||
n_batch, result);
|
||||
}
|
||||
|
||||
// Same as the function above, but the matrix is a sparse tensor with block
|
||||
// pattern 1x4.
|
||||
// This function assumes that m_cols is a multiple of the block size (4 in this
|
||||
// case) so that there's no incomplete block.
|
||||
inline void MatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
|
||||
const int8_t* __restrict__ vector, const float* scaling_factors,
|
||||
int n_batch, float* __restrict__ result) {
|
||||
PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
|
||||
scaling_factors, n_batch, result);
|
||||
}
|
||||
|
||||
inline void MatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
|
||||
const int8_t* __restrict__ vectors, const float* scaling_factors,
|
||||
int n_batch, float* __restrict__ result, const float* per_channel_scale,
|
||||
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
|
||||
bool* compute_row_sums, CpuBackendContext* context) {
|
||||
PortableMatrixBatchVectorMultiplyAccumulate(
|
||||
matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
|
||||
per_channel_scale, input_offset, scratch, row_sums, compute_row_sums,
|
||||
context);
|
||||
}
|
||||
|
||||
inline void MatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
|
||||
const int8_t* __restrict__ vector, const float* scaling_factors,
|
||||
int n_batch, int32_t* scratch, float* __restrict__ result,
|
||||
CpuBackendContext* context) {
|
||||
PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
|
||||
scaling_factors, n_batch, result);
|
||||
}
|
||||
|
||||
// Same as the function above, but the matrix is a sparse tensor with block
|
||||
// pattern 1x4.
|
||||
// This function assumes that m_cols is a multiple of the block size (4 in this
|
||||
// case) so that there's no incomplete block.
|
||||
inline void SparseMatrixBatchVectorMultiplyAccumulate1x4(
|
||||
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
|
||||
const int32_t* __restrict__ indices, int m_rows, int m_cols,
|
||||
const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
|
||||
PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
|
||||
matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
|
||||
}
|
||||
|
||||
// Same as the function above, but the matrix is stored in block compressed
|
||||
// sparse row format with block pattern 1x16 which consists of two arrays:
|
||||
// 1. A matrix array stores non-zero blocks of the matrix in row major.
|
||||
// 2. A ledger array stores nrows groups, one group per row. Each group starts
|
||||
// with an integer representing the number of non-zero blocks for the
|
||||
// corresponding row and follows with column indexes of the first element
|
||||
// of each non-zero block.
|
||||
// This function assumes that
|
||||
// 1. m_cols is a multiple of 16 so that all blocks are full blocks.
|
||||
// 2. m_cols < 254 * 16 so that block index can be represented by uint8.
|
||||
inline void SparseMatrixBatchVectorMultiplyAccumulate(
|
||||
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
|
||||
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
|
||||
float* __restrict__ result) {
|
||||
PortableSparseMatrixBatchVectorMultiplyAccumulate(
|
||||
matrix, ledger, m_rows, m_cols, vector, n_batch, result);
|
||||
}
|
||||
|
||||
// Same as the function above, but the matrix is a sparse tensor with block
|
||||
// pattern 1x16.
|
||||
// This function assumes that m_cols is a multiple of the block size (16 in this
|
||||
// case) so that there's no incomplete block. Also, it assumes all offsets of
|
||||
// input, output and filter are zero.
|
||||
inline void SparseMatrixBatchVectorMultiplyAccumulate1x16(
|
||||
const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
|
||||
const int32_t* __restrict__ indices, int m_rows, int m_cols,
|
||||
const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
|
||||
int n_batch, const int32_t input_offset, const int32_t output_multiplier,
|
||||
const int32_t output_shift, const int32_t output_offset,
|
||||
const int32_t output_activation_min, const int32_t output_activation_max,
|
||||
int8_t* __restrict__ result) {
|
||||
PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
|
||||
matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch,
|
||||
input_offset, output_multiplier, output_shift, output_offset,
|
||||
output_activation_min, output_activation_max, result);
|
||||
}
|
||||
|
||||
// Same as the function above, but the matrix is stored in block compressed
|
||||
// sparse row format with block pattern 1x16 which consists of two arrays:
|
||||
// 1. A matrix array stores non-zero blocks of the matrix in row major.
|
||||
// 2. A ledger array stores nrows groups, one group per row. Each group starts
|
||||
// with an integer representing the number of non-zero blocks for the
|
||||
// corresponding row followed by column index of the first element of
|
||||
// each non-zero block.
|
||||
// This function assumes that
|
||||
// 1. m_cols is a multiple of 16 so that all blocks are full blocks.
|
||||
// 2. m_cols < 254 * 16 so that block index can be represented by uint8.
|
||||
inline void SparseMatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
|
||||
const int m_cols, const int8_t* __restrict__ vectors,
|
||||
const float* scaling_factors, int n_batch, float* __restrict__ result) {
|
||||
PortableSparseMatrixBatchVectorMultiplyAccumulate(
|
||||
matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch,
|
||||
result);
|
||||
}
|
||||
|
||||
// Same as the above 8, 8, 8 integer matmul except for the presence of zero
|
||||
// point and non-accumulative.
|
||||
// TODO(b/148688698): remove this function by folding zero point calculation in
|
||||
// prepare() function.
|
||||
inline void MatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* input, const int32_t* bias,
|
||||
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
|
||||
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
|
||||
int32_t* scratch, int16_t* output, CpuBackendContext* context) {
|
||||
PortableMatrixBatchVectorMultiplyAccumulate(
|
||||
input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
|
||||
n_output, output_zp, scratch, output, context);
|
||||
}
|
||||
|
||||
// Same as above but has 16 bit and 8 bit input and 8 bit output.
|
||||
// Used in projection when hidden is 16bit.
|
||||
inline void MatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* input, const int32_t* bias,
|
||||
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
|
||||
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
|
||||
int32_t* scratch, int8_t* output, CpuBackendContext* context) {
|
||||
PortableMatrixBatchVectorMultiplyAccumulate(
|
||||
input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
|
||||
n_output, output_zp, scratch, output, context);
|
||||
}
|
||||
|
||||
// Same as the function above, but provides separate scaling factor for the
|
||||
// matrix and the vectors. The scaling factors are multiplied in the
|
||||
// scaling_factor_scratch buffer.
|
||||
inline void MatrixBatchVectorMultiplyAccumulate(
|
||||
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
|
||||
const int8_t* __restrict__ vectors, const float matrix_scaling_factor,
|
||||
const float* vector_scaling_factors, int n_batch,
|
||||
float* __restrict__ result, const float* per_channel_scale,
|
||||
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
|
||||
bool* compute_row_sums, float* scaling_factor_scratch,
|
||||
CpuBackendContext* context) {
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
scaling_factor_scratch[b] =
|
||||
vector_scaling_factors[b] * matrix_scaling_factor;
|
||||
}
|
||||
MatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vectors,
|
||||
scaling_factor_scratch, n_batch, result,
|
||||
per_channel_scale, input_offset, scratch,
|
||||
row_sums, compute_row_sums, context);
|
||||
}
|
||||
|
||||
// Multiplies a matrix with a scalar and reduce the result on each row to a
|
||||
// scalar.
|
||||
// Parameters:
|
||||
// - matrix: matrix of size n_row * n_col
|
||||
// - scalar: the scalar that is multiplied to each element in the matrix
|
||||
// - n_row: the row count of the matrix
|
||||
// - n_col: the column count of the matrix
|
||||
// - output: the 32bit output
|
||||
// Note: We do not need saturation because the int8 * int8 is safe from overflow
|
||||
// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero
|
||||
// initial output value is not exceptionally large.
|
||||
inline void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
|
||||
int32_t n_row, int32_t n_col,
|
||||
int32_t* output) {
|
||||
PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output);
|
||||
}
|
||||
|
||||
// Same as the above 8, 8, 8 integer matmul except for the presence of zero
|
||||
// point and non-accumulative.
|
||||
// TODO(b/148688698): remove this function by folding zero point calculation in
|
||||
// prepare() function.
|
||||
inline void MatrixBatchVectorMultiply(const int8_t* input,
|
||||
int32_t input_zeropoint,
|
||||
const int8_t* input_to_gate_weights,
|
||||
int32_t input_to_gate_effective_scale_a,
|
||||
int32_t input_to_gate_effective_scale_b,
|
||||
int32_t n_batch, int32_t n_input,
|
||||
int32_t n_cell, int8_t* gate_output,
|
||||
int8_t gate_output_zp) {
|
||||
PortableMatrixBatchVectorMultiply(
|
||||
input, input_zeropoint, input_to_gate_weights,
|
||||
input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch,
|
||||
n_input, n_cell, gate_output, gate_output_zp);
|
||||
}
|
||||
|
||||
// Same as above but has 16 bit and 8 bit input and 8 bit output.
|
||||
// Used in projection when hidden is 16bit.
|
||||
inline void MatrixBatchVectorMultiply(const int16_t* hidden,
|
||||
const int8_t* hidden_to_output_weights,
|
||||
int32_t proj_effective_scale_a,
|
||||
int32_t proj_effective_scale_b,
|
||||
const int32_t* gate_bias, int32_t n_batch,
|
||||
int32_t n_hidden, int32_t n_output,
|
||||
int32_t output_zp, int8_t* proj_output) {
|
||||
PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights,
|
||||
proj_effective_scale_a,
|
||||
proj_effective_scale_b, gate_bias, n_batch,
|
||||
n_hidden, n_output, output_zp, proj_output);
|
||||
}
|
||||
|
||||
// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
|
||||
// operation, the assumption here is that result array is initialized to valid
|
||||
// values.
|
||||
template <typename T>
|
||||
inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size,
|
||||
const T* batch_vector,
|
||||
int n_batch, T* result) {
|
||||
for (int b = 0; b < n_batch; b++) {
|
||||
VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
|
||||
// Update the pointers.
|
||||
result += v_size;
|
||||
batch_vector += v_size;
|
||||
}
|
||||
}
|
||||
|
||||
// Same as above, but inputs are 16bit integer and output is 16bit integer.
|
||||
inline void VectorBatchVectorCwiseProductAccumulate(
|
||||
const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
|
||||
int32_t multiplier, int shift, int16_t* result) {
|
||||
PortableVectorBatchVectorCwiseProductAccumulate(
|
||||
vector, v_size, batch_vector, n_batch, multiplier, shift, result);
|
||||
}
|
||||
|
||||
// Apply Rectified Linear to elements of a vector.
|
||||
inline void ApplyReluToVector(const float* vector, int v_size, float* result) {
|
||||
for (int v = 0; v < v_size; v++) {
|
||||
result[v] = std::max(0.0f, vector[v]);
|
||||
}
|
||||
}
|
||||
|
||||
// Apply Rectified Linear 1 (cap to [-1;1]) to elements of a vector
|
||||
inline void ApplyRelu1ToVector(const float* vector, int v_size, float* result) {
|
||||
for (int v = 0; v < v_size; v++) {
|
||||
result[v] = std::max(-1.0f, std::min(vector[v], 1.0f));
|
||||
}
|
||||
}
|
||||
|
||||
// Apply Rectified Linear 6 (cap to [0;6]) to elements of a vector
|
||||
inline void ApplyRelu6ToVector(const float* vector, int v_size, float* result) {
|
||||
for (int v = 0; v < v_size; v++) {
|
||||
result[v] = std::max(0.0f, std::min(vector[v], 6.0f));
|
||||
}
|
||||
}
|
||||
|
||||
// Apply tanh to elements of a vector
|
||||
inline void ApplyTanhToVector(const float* vector, int v_size, float* result) {
|
||||
for (int v = 0; v < v_size; v++) {
|
||||
result[v] = std::tanh(vector[v]);
|
||||
}
|
||||
}
|
||||
|
||||
// Apply signbit to elements of a vector
|
||||
inline void ApplySignbitToVector(const float* vector, int v_size,
|
||||
float* result) {
|
||||
for (int v = 0; v < v_size; v++) {
|
||||
result[v] = std::signbit(vector[v]);
|
||||
}
|
||||
}
|
||||
|
||||
// Apply sigmoid to elements of a vector.
|
||||
inline void ApplySigmoidToVector(const float* vector, int v_size,
|
||||
float* result) {
|
||||
for (int v = 0; v < v_size; v++) {
|
||||
result[v] = 1.0f / (1.0f + std::exp(-vector[v]));
|
||||
}
|
||||
}
|
||||
|
||||
// Apply appropriate activation function to elements of a vector.
|
||||
inline void ApplyActivationToVector(const float* vector, int v_size,
|
||||
TfLiteFusedActivation activation,
|
||||
float* result) {
|
||||
switch (activation) {
|
||||
case kTfLiteActNone:
|
||||
return;
|
||||
case kTfLiteActRelu:
|
||||
return ApplyReluToVector(vector, v_size, result);
|
||||
case kTfLiteActReluN1To1:
|
||||
return ApplyRelu1ToVector(vector, v_size, result);
|
||||
case kTfLiteActRelu6:
|
||||
return ApplyRelu6ToVector(vector, v_size, result);
|
||||
case kTfLiteActTanh:
|
||||
return ApplyTanhToVector(vector, v_size, result);
|
||||
case kTfLiteActSignBit:
|
||||
return ApplySignbitToVector(vector, v_size, result);
|
||||
case kTfLiteActSigmoid:
|
||||
return ApplySigmoidToVector(vector, v_size, result);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace micro_tensor_utils
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_
|
||||
@@ -1,40 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
// Same as gtl::Greater but defined here to reduce dependencies and
|
||||
// binary size for micro environment.
|
||||
struct Greater {
|
||||
template <typename T>
|
||||
bool operator()(const T& x, const T& y) const {
|
||||
return x > y;
|
||||
}
|
||||
};
|
||||
|
||||
struct Less {
|
||||
template <typename T>
|
||||
bool operator()(const T& x, const T& y) const {
|
||||
return x < y;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
|
||||
@@ -1,215 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
struct OpDataMirrorPad {
|
||||
int input_dims;
|
||||
int output_size;
|
||||
int offset;
|
||||
int output_dims_num_elements_buffer_index;
|
||||
int input_dims_num_elements_buffer_index;
|
||||
};
|
||||
|
||||
// Helper method that fills the left and right pads.
|
||||
template <typename T>
|
||||
inline void GetPadding(const T* data, int offset, int64_t* left_pad,
|
||||
int64_t* right_pad) {
|
||||
*left_pad = static_cast<int64_t>(*(data + offset * 2));
|
||||
*right_pad = static_cast<int64_t>(*(data + offset * 2 + 1));
|
||||
}
|
||||
|
||||
// Given dimension index and the left/right padding.
|
||||
// Returns the corresponding dimension in the input array.
|
||||
inline int GetInputDimension(int padded_dimension, int left_pad, int right_pad,
|
||||
int input_dim_size, int offset) {
|
||||
if (padded_dimension < left_pad) {
|
||||
const int original_ind = left_pad + offset - 1;
|
||||
return original_ind - (std::min(padded_dimension, original_ind - offset));
|
||||
}
|
||||
padded_dimension -= left_pad;
|
||||
if (padded_dimension >= input_dim_size) {
|
||||
padded_dimension -= input_dim_size;
|
||||
const int original_ind = input_dim_size - (1 + offset);
|
||||
return original_ind - std::min(padded_dimension, original_ind);
|
||||
}
|
||||
return padded_dimension;
|
||||
}
|
||||
|
||||
// Given and index in output array, returns the index of the value
|
||||
// in input array.
|
||||
int GetFlatIndex(int index, int num_dims,
|
||||
const TfLiteEvalTensor* padding_matrix,
|
||||
const TfLiteIntArray* input_dims,
|
||||
int* output_dims_num_elements, int* input_dims_num_elements,
|
||||
const int offset) {
|
||||
int flat_index = 0;
|
||||
int64_t left_pad = 0, right_pad = 0, dimension_index, index_in_input;
|
||||
|
||||
for (int i = 0; i < num_dims; ++i) {
|
||||
switch (padding_matrix->type) {
|
||||
case kTfLiteInt32:
|
||||
GetPadding(padding_matrix->data.i32, i, &left_pad, &right_pad);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
GetPadding(padding_matrix->data.i64, i, &left_pad, &right_pad);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
dimension_index = index / output_dims_num_elements[i];
|
||||
|
||||
index_in_input = GetInputDimension(dimension_index, left_pad, right_pad,
|
||||
input_dims->data[i], offset);
|
||||
|
||||
flat_index += index_in_input * (input_dims_num_elements)[i];
|
||||
index %= output_dims_num_elements[i];
|
||||
}
|
||||
|
||||
return flat_index;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void MirrorPad(const TfLiteEvalTensor* padding_matrix,
|
||||
const TfLiteIntArray* input_dims, int* output_dims_num_elements,
|
||||
int* input_dims_num_elements, const T* input_data,
|
||||
T* output_data, const int offset, const int num_dims,
|
||||
const int output_size) {
|
||||
for (int i = 0; i < output_size; ++i) {
|
||||
output_data[i] = input_data[GetFlatIndex(
|
||||
i, num_dims, padding_matrix, input_dims, output_dims_num_elements,
|
||||
input_dims_num_elements, offset)];
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TfLiteStatus status = kTfLiteOk;
|
||||
const OpDataMirrorPad* data =
|
||||
static_cast<const OpDataMirrorPad*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input_tensor =
|
||||
tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* padding_matrix =
|
||||
tflite::micro::GetEvalInput(context, node, 1);
|
||||
|
||||
TfLiteEvalTensor* output_tensor =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
const int input_dims = data->input_dims;
|
||||
const int output_size = data->output_size;
|
||||
|
||||
int* input_dims_num_elements = (int*)context->GetScratchBuffer(
|
||||
context, data->input_dims_num_elements_buffer_index);
|
||||
int* output_dims_num_elements = (int*)context->GetScratchBuffer(
|
||||
context, data->output_dims_num_elements_buffer_index);
|
||||
|
||||
for (int i = 0; i < input_dims; i++) {
|
||||
output_dims_num_elements[i] = 1;
|
||||
input_dims_num_elements[i] = 1;
|
||||
}
|
||||
|
||||
for (int i = input_dims - 2; i >= 0; i--) {
|
||||
output_dims_num_elements[i] =
|
||||
output_dims_num_elements[i + 1] * output_tensor->dims->data[i + 1];
|
||||
|
||||
input_dims_num_elements[i] =
|
||||
input_dims_num_elements[i + 1] * input_tensor->dims->data[i + 1];
|
||||
}
|
||||
|
||||
switch (output_tensor->type) {
|
||||
case kTfLiteFloat32: {
|
||||
MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements,
|
||||
input_dims_num_elements,
|
||||
tflite::micro::GetTensorData<float>(input_tensor),
|
||||
tflite::micro::GetTensorData<float>(output_tensor),
|
||||
data->offset, input_dims, output_size);
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements,
|
||||
input_dims_num_elements,
|
||||
tflite::micro::GetTensorData<int8_t>(input_tensor),
|
||||
tflite::micro::GetTensorData<int8_t>(output_tensor),
|
||||
data->offset, input_dims, output_size);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
status = kTfLiteError;
|
||||
break;
|
||||
}
|
||||
|
||||
#undef TF_LITE_MIRROR_PAD
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataMirrorPad));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpDataMirrorPad* data = static_cast<OpDataMirrorPad*>(node->user_data);
|
||||
|
||||
TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0);
|
||||
TfLiteTensor* padding_matrix =
|
||||
micro_context->AllocateTempInputTensor(node, 1);
|
||||
TfLiteTensor* output_tensor =
|
||||
micro_context->AllocateTempOutputTensor(node, 0);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(padding_matrix), 2);
|
||||
TF_LITE_ENSURE_EQ(context, SizeOfDimension(padding_matrix, 0),
|
||||
NumDimensions(input_tensor));
|
||||
auto* params =
|
||||
reinterpret_cast<TfLiteMirrorPaddingParams*>(node->builtin_data);
|
||||
if (params == nullptr) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
data->offset =
|
||||
params->mode != TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect ? 0
|
||||
: 1;
|
||||
data->input_dims = NumDimensions(input_tensor);
|
||||
data->output_size = NumElements(output_tensor);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
|
||||
context, data->input_dims * sizeof(int),
|
||||
&data->output_dims_num_elements_buffer_index));
|
||||
TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
|
||||
context, data->input_dims * sizeof(int),
|
||||
&data->input_dims_num_elements_buffer_index));
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input_tensor);
|
||||
micro_context->DeallocateTempTfLiteTensor(padding_matrix);
|
||||
micro_context->DeallocateTempTfLiteTensor(output_tensor);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_MIRROR_PAD() {
|
||||
return tflite::micro::RegisterOp(Init, Prepare, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,67 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/mul.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/mul.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
|
||||
|
||||
switch (input1->type) {
|
||||
case kTfLiteInt8:
|
||||
case kTfLiteInt32:
|
||||
EvalMulQuantizedReference(context, node, data, input1, input2, output);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
EvalMulFloatReference(context, node, params, data, input1, input2,
|
||||
output);
|
||||
break;
|
||||
default:
|
||||
MicroPrintf("Type %s (%d) not supported.",
|
||||
TfLiteTypeGetName(input1->type), input1->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MUL() {
|
||||
return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,184 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/mul.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/mul.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kMulInput1Tensor = 0;
|
||||
const int kMulInput2Tensor = 1;
|
||||
const int kMulOutputTensor = 0;
|
||||
|
||||
void* MulInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataMul));
|
||||
}
|
||||
|
||||
TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, OpDataMul* data) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input1 =
|
||||
micro_context->AllocateTempInputTensor(node, kMulInput1Tensor);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
TfLiteTensor* input2 =
|
||||
micro_context->AllocateTempInputTensor(node, kMulInput2Tensor);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kMulOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
|
||||
|
||||
if (output->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
|
||||
double real_multiplier = static_cast<double>(input1->params.scale) *
|
||||
static_cast<double>(input2->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||
&data->output_shift);
|
||||
|
||||
data->input1_zero_point = input1->params.zero_point;
|
||||
data->input2_zero_point = input2->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
} else if (output->type == kTfLiteInt32) {
|
||||
CalculateActivationRange(params->activation, &data->output_activation_min,
|
||||
&data->output_activation_max);
|
||||
} else {
|
||||
CalculateActivationRange(params->activation,
|
||||
&data->output_activation_min_f32,
|
||||
&data->output_activation_max_f32);
|
||||
}
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input1);
|
||||
micro_context->DeallocateTempTfLiteTensor(input2);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus MulPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpDataMul* data = static_cast<OpDataMul*>(node->user_data);
|
||||
|
||||
return CalculateOpDataMul(context, node, params, data);
|
||||
}
|
||||
|
||||
void EvalMulQuantizedReference(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpDataMul* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.quantized_activation_min = data->output_activation_min;
|
||||
op_params.quantized_activation_max = data->output_activation_max;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
op_params.input1_offset = -data->input1_zero_point;
|
||||
op_params.input2_offset = -data->input2_zero_point;
|
||||
op_params.output_offset = data->output_zero_point;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (input1->type == kTfLiteInt8) {
|
||||
if (need_broadcast) {
|
||||
reference_integer_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
reference_integer_ops::Mul(op_params,
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
} else if (input1->type == kTfLiteInt32) {
|
||||
if (need_broadcast) {
|
||||
reference_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int32_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int32_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} else {
|
||||
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int32_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int32_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EvalMulFloatReference(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteMulParams* params, const OpDataMul* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params = {};
|
||||
op_params.float_activation_min = data->output_activation_min_f32;
|
||||
op_params.float_activation_max = data->output_activation_max_f32;
|
||||
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
if (need_broadcast) {
|
||||
reference_ops::BroadcastMul4DSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,170 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/pooling.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/pooling.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kPoolingInputTensor = 0;
|
||||
const int kPoolingOutputTensor = 0;
|
||||
|
||||
TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
|
||||
const TfLitePoolParams* params,
|
||||
const TfLiteTensor* input,
|
||||
const TfLiteTensor* output,
|
||||
OpDataPooling* data) {
|
||||
// input: batch, height, width, channel
|
||||
int height = SizeOfDimension(input, 1);
|
||||
int width = SizeOfDimension(input, 2);
|
||||
|
||||
int out_height, out_width;
|
||||
|
||||
data->padding = ComputePaddingHeightWidth(
|
||||
params->stride_height, params->stride_width,
|
||||
/*dilation_rate_height=*/1,
|
||||
/*dilation_rate_width=*/1, height, width, params->filter_height,
|
||||
params->filter_width, params->padding, &out_height, &out_width);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpDataPooling* data = static_cast<OpDataPooling*>(node->user_data);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kPoolingInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kPoolingOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpDataPooling(context, params, input, output, data));
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
CalculateActivationRange(params->activation, &data->activation_min_f32,
|
||||
&data->activation_max_f32);
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
CalculateActivationRangeQuantized(context, params->activation, output,
|
||||
&data->activation_min,
|
||||
&data->activation_max);
|
||||
}
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void AveragePoolingEvalFloat(const TfLiteContext* context,
|
||||
const TfLiteNode* node,
|
||||
const TfLitePoolParams* params,
|
||||
const OpDataPooling* data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
|
||||
const TfLitePoolParams* params,
|
||||
const OpDataPooling* data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
TFLITE_DCHECK(input->type == kTfLiteInt8);
|
||||
|
||||
PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
reference_integer_ops::AveragePool(
|
||||
op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params, const OpDataPooling* data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.float_activation_min = data->activation_min_f32;
|
||||
op_params.float_activation_max = data->activation_max_f32;
|
||||
reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
|
||||
void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLitePoolParams* params,
|
||||
const OpDataPooling* data,
|
||||
const TfLiteEvalTensor* input,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::PoolParams op_params;
|
||||
op_params.stride_height = params->stride_height;
|
||||
op_params.stride_width = params->stride_width;
|
||||
op_params.filter_height = params->filter_height;
|
||||
op_params.filter_width = params->filter_width;
|
||||
op_params.padding_values.height = data->padding.height;
|
||||
op_params.padding_values.width = data->padding.width;
|
||||
op_params.quantized_activation_min = data->activation_min;
|
||||
op_params.quantized_activation_max = data->activation_max;
|
||||
|
||||
reference_integer_ops::MaxPool(op_params,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int8_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,39 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
|
||||
const TfLiteTensor* alpha,
|
||||
TfLiteTensor* output, PreluParams* params);
|
||||
|
||||
void BroadcastPrelu4DSlowFloat(const RuntimeShape& unextended_input1_shape,
|
||||
const float* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const float* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
float* output_data);
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_
|
||||
@@ -1,105 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/prelu.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
|
||||
const TfLiteTensor* alpha,
|
||||
TfLiteTensor* output, PreluParams* params) {
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
|
||||
double real_multiplier_1 = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
double real_multiplier_2 = static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(alpha->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
QuantizeMultiplier(real_multiplier_1, ¶ms->output_multiplier_1,
|
||||
¶ms->output_shift_1);
|
||||
QuantizeMultiplier(real_multiplier_2, ¶ms->output_multiplier_2,
|
||||
¶ms->output_shift_2);
|
||||
|
||||
params->input_offset = -input->params.zero_point;
|
||||
params->alpha_offset = -alpha->params.zero_point;
|
||||
params->output_offset = output->params.zero_point;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void BroadcastPrelu4DSlowFloat(const RuntimeShape& unextended_input1_shape,
|
||||
const float* input1_data,
|
||||
const RuntimeShape& unextended_input2_shape,
|
||||
const float* input2_data,
|
||||
const RuntimeShape& unextended_output_shape,
|
||||
float* output_data) {
|
||||
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
|
||||
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
|
||||
const RuntimeShape output_shape =
|
||||
RuntimeShape::ExtendedShape(4, unextended_output_shape);
|
||||
|
||||
NdArrayDesc<4> desc1;
|
||||
NdArrayDesc<4> desc2;
|
||||
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
|
||||
unextended_input2_shape, &desc1, &desc2);
|
||||
|
||||
for (int b = 0; b < output_shape.Dims(0); ++b) {
|
||||
for (int y = 0; y < output_shape.Dims(1); ++y) {
|
||||
for (int x = 0; x < output_shape.Dims(2); ++x) {
|
||||
for (int c = 0; c < output_shape.Dims(3); ++c) {
|
||||
auto out_idx = Offset(output_shape, b, y, x, c);
|
||||
auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
|
||||
auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
|
||||
auto in1_val = input1_data[in1_idx];
|
||||
auto in2_val = input2_data[in2_idx];
|
||||
output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
PreluParams* params = static_cast<PreluParams*>(node->user_data);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* alpha = micro_context->AllocateTempInputTensor(node, 1);
|
||||
TF_LITE_ENSURE(context, alpha != nullptr);
|
||||
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
CalculatePreluParams(input, alpha, output, params));
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(alpha);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,41 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/quantize.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context,
|
||||
sizeof(OpDataQuantizeReference));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_QUANTIZE() {
|
||||
return tflite::micro::RegisterOp(Init, PrepareQuantizeReference,
|
||||
EvalQuantizeReference);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,37 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
struct OpDataQuantizeReference {
|
||||
tflite::QuantizationParams quantization_params;
|
||||
// The scaling factor from input to output (aka the 'real multiplier') can
|
||||
// be represented as a fixed point multiplier plus a left shift.
|
||||
int32_t requantize_output_multiplier;
|
||||
int requantize_output_shift;
|
||||
|
||||
int32_t input_zero_point;
|
||||
};
|
||||
|
||||
TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node);
|
||||
TfLiteStatus PrepareQuantizeReference(TfLiteContext* context, TfLiteNode* node);
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
|
||||
@@ -1,239 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/quantize.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
|
||||
TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
// TODO(b/128934713): Add support for fixed-point per-channel quantization.
|
||||
// Currently this only support affine per-layer quantization.
|
||||
TF_LITE_ENSURE_EQ(context, output->quantization.type,
|
||||
kTfLiteAffineQuantization);
|
||||
const auto* affine_quantization =
|
||||
reinterpret_cast<TfLiteAffineQuantization*>(output->quantization.params);
|
||||
TF_LITE_ENSURE(context, affine_quantization);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||
TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
|
||||
|
||||
TF_LITE_ENSURE(
|
||||
context, input->type == kTfLiteFloat32 || input->type == kTfLiteInt32 ||
|
||||
input->type == kTfLiteInt16 || input->type == kTfLiteInt8 ||
|
||||
input->type == kTfLiteUInt8);
|
||||
TF_LITE_ENSURE(context, output->type == kTfLiteInt8 ||
|
||||
output->type == kTfLiteInt16 ||
|
||||
output->type == kTfLiteInt32 ||
|
||||
output->type == kTfLiteUInt8);
|
||||
|
||||
if ((input->type == kTfLiteInt16 && output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt8 && output->type == kTfLiteUInt8) ||
|
||||
(input->type == kTfLiteUInt8 && output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt16) ||
|
||||
(input->type == kTfLiteInt8 && output->type == kTfLiteInt32) ||
|
||||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt16) ||
|
||||
(input->type == kTfLiteInt16 && output->type == kTfLiteInt32) ||
|
||||
(input->type == kTfLiteInt32 && output->type == kTfLiteInt8) ||
|
||||
(input->type == kTfLiteInt32 && output->type == kTfLiteInt16)) {
|
||||
double effective_scale = static_cast<double>(input->params.scale) /
|
||||
static_cast<double>(output->params.scale);
|
||||
|
||||
QuantizeMultiplier(effective_scale, &data->requantize_output_multiplier,
|
||||
&data->requantize_output_shift);
|
||||
}
|
||||
|
||||
data->quantization_params.zero_point = output->params.zero_point;
|
||||
data->quantization_params.scale = static_cast<double>(output->params.scale);
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (input->type == kTfLiteFloat32) {
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::AffineQuantize(
|
||||
data->quantization_params, tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt32) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int32_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int32_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
break;
|
||||
default:
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt16) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
return kTfLiteOk;
|
||||
case kTfLiteInt32:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int16_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
return kTfLiteOk;
|
||||
default:
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteInt8) {
|
||||
// Int8 to Int8 requantization, required if the input and output tensors
|
||||
// have different scales and/or zero points.
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int8_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int8_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<uint8_t>(output));
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int8_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<int8_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
break;
|
||||
default:
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else if (input->type == kTfLiteUInt8) {
|
||||
size_t size = ElementCount(*input->dims);
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8:
|
||||
reference_ops::Requantize(
|
||||
tflite::micro::GetTensorData<uint8_t>(input), size,
|
||||
data->requantize_output_multiplier, data->requantize_output_shift,
|
||||
data->input_zero_point, data->quantization_params.zero_point,
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
break;
|
||||
default:
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
} else {
|
||||
MicroPrintf("Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,87 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_graph.h"
|
||||
#include "tensorflow/lite/micro/micro_resource_variable.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kInputVariableId = 0;
|
||||
constexpr int kOutputValue = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(NumInputs(node) == 1);
|
||||
TFLITE_DCHECK(NumOutputs(node) == 1);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input_resource_id_tensor =
|
||||
micro_context->AllocateTempInputTensor(node, kInputVariableId);
|
||||
|
||||
TFLITE_DCHECK(input_resource_id_tensor != nullptr);
|
||||
TFLITE_DCHECK(input_resource_id_tensor->type == kTfLiteResource);
|
||||
TFLITE_DCHECK(NumElements(input_resource_id_tensor) == 1);
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input_resource_id_tensor);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input_resource_id_tensor =
|
||||
tflite::micro::GetEvalInput(context, node, kInputVariableId);
|
||||
TFLITE_DCHECK(input_resource_id_tensor != nullptr);
|
||||
|
||||
TfLiteEvalTensor* output_value =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputValue);
|
||||
TFLITE_DCHECK(output_value != nullptr);
|
||||
|
||||
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
|
||||
MicroGraph& graph_info = micro_context->graph();
|
||||
|
||||
MicroResourceVariables* resources = graph_info.GetResourceVariables();
|
||||
if (resources == nullptr) {
|
||||
MicroPrintf(
|
||||
"READ_VARIABLE requires resource variables. Please create "
|
||||
"ResourceVariables and pass it to the interpreter.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
TF_LITE_ENSURE_OK(
|
||||
context,
|
||||
resources->Read(input_resource_id_tensor->data.i32[0], output_value));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
TfLiteRegistration Register_READ_VARIABLE() {
|
||||
return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,72 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/reduce.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mean.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/reduce.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataReduce));
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
|
||||
return PrepareMaxHelper(context, node,
|
||||
static_cast<OpDataReduce*>(node->user_data));
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
|
||||
return PrepareMeanOrSumHelper(context, node,
|
||||
static_cast<OpDataReduce*>(node->user_data));
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalMeanHelper(context, node,
|
||||
static_cast<OpDataReduce*>(node->user_data));
|
||||
}
|
||||
|
||||
TfLiteStatus EvalMax(TfLiteContext* context, TfLiteNode* node) {
|
||||
OpDataReduce* op_data = static_cast<OpDataReduce*>(node->user_data);
|
||||
return EvalMaxHelper(context, node, op_data);
|
||||
}
|
||||
|
||||
TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) {
|
||||
return EvalSumHelper(context, node,
|
||||
static_cast<OpDataReduce*>(node->user_data));
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_MEAN() {
|
||||
return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalMean);
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_REDUCE_MAX() {
|
||||
return tflite::micro::RegisterOp(InitReduce, PrepareMax, EvalMax);
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_SUM() {
|
||||
return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalSum);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,64 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
extern const int kMaxNumberOfAxis;
|
||||
extern const int kMaxNumberOfReducedAxis;
|
||||
|
||||
struct OpDataReduce {
|
||||
int32_t multiplier;
|
||||
int shift;
|
||||
int temp_buffer_idx;
|
||||
int resolved_axis_idx;
|
||||
int input_zp;
|
||||
float input_scale;
|
||||
int output_zp;
|
||||
float output_scale;
|
||||
int num_output_elements;
|
||||
};
|
||||
|
||||
TfLiteStatus PrepareMaxHelper(TfLiteContext* context, TfLiteNode* node,
|
||||
OpDataReduce* op_data);
|
||||
|
||||
TfLiteStatus PrepareMeanOrSumHelper(TfLiteContext* context, TfLiteNode* node,
|
||||
OpDataReduce* op_data);
|
||||
|
||||
TfLiteStatus EvalMaxHelper(TfLiteContext* context, TfLiteNode* node,
|
||||
OpDataReduce* op_data);
|
||||
TfLiteStatus EvalMeanHelper(TfLiteContext* context, TfLiteNode* node,
|
||||
OpDataReduce* op_data);
|
||||
TfLiteStatus EvalSumHelper(TfLiteContext* context, TfLiteNode* node,
|
||||
OpDataReduce* op_data);
|
||||
|
||||
void ReduceResolveAxis(const int* axis_data, int axis_count,
|
||||
MeanParams* op_params);
|
||||
|
||||
TfLiteRegistration Register_MEAN();
|
||||
TfLiteRegistration Register_REDUCE_MAX();
|
||||
TfLiteRegistration Register_SUM();
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_
|
||||
@@ -1,118 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace reshape {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
// Tensorflow's Reshape allows one of the shape components to have the
|
||||
// special -1 value, meaning it will be calculated automatically based on the
|
||||
// input. Here we calculate what that dimension should be so that the number
|
||||
// of output elements in the same as the number of input elements.
|
||||
int num_input_elements = NumElements(input);
|
||||
TfLiteIntArray* output_shape = output->dims;
|
||||
|
||||
if (NumInputs(node) == 1 && // Legacy scalar supported with params.
|
||||
output_shape->size == 1 && output_shape->data[0] == 0) {
|
||||
// Legacy tflite models use a shape parameter of [0] to indicate scalars,
|
||||
// so adjust accordingly. TODO(b/111614235): Allow zero-sized buffers during
|
||||
// toco conversion.
|
||||
output_shape->size = 0;
|
||||
}
|
||||
|
||||
int num_output_elements = 1;
|
||||
int stretch_dim = -1;
|
||||
for (int i = 0; i < output_shape->size; ++i) {
|
||||
int value = output_shape->data[i];
|
||||
if (value == -1) {
|
||||
TF_LITE_ENSURE_EQ(context, stretch_dim, -1);
|
||||
stretch_dim = i;
|
||||
} else {
|
||||
num_output_elements *= value;
|
||||
}
|
||||
}
|
||||
if (stretch_dim != -1) {
|
||||
output_shape->data[stretch_dim] = num_input_elements / num_output_elements;
|
||||
num_output_elements *= output_shape->data[stretch_dim];
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
|
||||
TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, ReshapeOutput(context, node), kTfLiteOk);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
// TODO(b/162522304): storing input bytes in OpData increases some models
|
||||
// significantly, possibly due to alignment issues.
|
||||
size_t input_bytes;
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
|
||||
input_bytes *= ElementCount(*input->dims);
|
||||
|
||||
// Do nothing for in-place reshape.
|
||||
if (input->data.raw != output->data.raw) {
|
||||
// Otherwise perform reshape with copy.
|
||||
memcpy(output->data.raw, input->data.raw, input_bytes);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace reshape
|
||||
|
||||
TfLiteRegistration Register_RESHAPE() {
|
||||
return tflite::micro::RegisterOp(nullptr, reshape::Prepare, reshape::Eval);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -1,76 +0,0 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/round.h"
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace round {
|
||||
|
||||
constexpr int kInputTensor = 0;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
|
||||
TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
|
||||
for (int i = 0; i < output->dims->size; ++i) {
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
|
||||
}
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
reference_ops::Round(tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<float>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace round
|
||||
|
||||
TfLiteRegistration Register_ROUND() {
|
||||
return tflite::micro::RegisterOp(nullptr, round::Prepare, round::Eval);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -1,69 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length);
|
||||
|
||||
// Common helper function to SoftmaxPrepare.
|
||||
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
TfLiteTensor* output,
|
||||
const TfLiteSoftmaxParams* params,
|
||||
SoftmaxParams* op_data);
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
// This is the most generic TfLiteRegistration. The actual supported types may
|
||||
// still be target dependent. The only requirement is that every implementation
|
||||
// (reference or optimized) must define this function.
|
||||
TfLiteRegistration Register_SOFTMAX();
|
||||
|
||||
#if defined(XTENSA) || defined(CMSIS_NN)
|
||||
// Returns a TfLiteRegistration struct for kernel variant that only supports
|
||||
// int8 input and int16 output.
|
||||
TfLiteRegistration Register_SOFTMAX_INT8_INT16();
|
||||
#else
|
||||
inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() {
|
||||
return Register_SOFTMAX();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CMSIS_NN)
|
||||
// Returns a TfLiteRegistration struct for kernel variant that only supports
|
||||
// int8 input/output and uses the latency optimized implementations.
|
||||
TfLiteRegistration Register_SOFTMAX_INT8();
|
||||
|
||||
// Returns a TfLiteRegistration struct for kernel variant that only supports
|
||||
// int16 input/output and uses the latency optimized implementations.
|
||||
TfLiteRegistration Register_SOFTMAX_INT16();
|
||||
|
||||
#else
|
||||
inline TfLiteRegistration Register_SOFTMAX_INT8() { return Register_SOFTMAX(); }
|
||||
|
||||
inline TfLiteRegistration Register_SOFTMAX_INT16() {
|
||||
return Register_SOFTMAX();
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
|
||||
@@ -1,162 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/softmax.h"
|
||||
#include "tensorflow/lite/micro/micro_context.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
// Softmax parameter data that persists in user_data
|
||||
const int kInt16LUTArraySize = 513;
|
||||
|
||||
TfLiteStatus InitializeLutForInt16(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
TfLiteTensor* output,
|
||||
SoftmaxParams* op_data) {
|
||||
// Only allocate LUTs for KTfLiteInt16 data type
|
||||
if (input->type == kTfLiteInt16) {
|
||||
void* raw_exp_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
|
||||
op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
|
||||
void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
|
||||
context, sizeof(int16_t) * kInt16LUTArraySize);
|
||||
TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
|
||||
op_data->one_over_one_plus_x_lut =
|
||||
reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
|
||||
}
|
||||
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
|
||||
} else {
|
||||
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||
}
|
||||
|
||||
// Populate LUT if required
|
||||
if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
// exp LUT only used on negative values
|
||||
// we consider exp(-10.0) is insignificant to accumulation
|
||||
gen_lut<float, int16_t, int16_t>(
|
||||
[](float value) { return std::exp(value); }, -10.0f, 0.0f, -1.0f, 1.0f,
|
||||
op_data->exp_lut);
|
||||
gen_lut<float, int16_t, int16_t>(
|
||||
[](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f, -1.0f,
|
||||
1.0f, op_data->one_over_one_plus_x_lut);
|
||||
op_data->zero_point = output->params.zero_point;
|
||||
op_data->scale = output->params.scale;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
|
||||
const TfLiteTensor* input,
|
||||
TfLiteTensor* output,
|
||||
const TfLiteSoftmaxParams* params,
|
||||
SoftmaxParams* op_data) {
|
||||
if (InitializeLutForInt16(context, input, output, op_data) != kTfLiteOk) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
|
||||
if (input->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
|
||||
(0.001f * 1.f / 32768));
|
||||
} else { // input->type == kTfLiteInt8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
|
||||
if (output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
|
||||
TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536,
|
||||
(0.001f * 1.f / 65536));
|
||||
} else { // output->type == kTfLiteint8
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
|
||||
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
|
||||
}
|
||||
}
|
||||
|
||||
static const int kScaledDiffIntegerBits = 5;
|
||||
|
||||
// Calculate input_multiplier and input_left_shift
|
||||
if (input->type == kTfLiteInt16) {
|
||||
int input_left_shift;
|
||||
double input_scale_beta_rescale =
|
||||
static_cast<double>(input->params.scale) *
|
||||
static_cast<double>(params->beta) /
|
||||
(10.0 / 65535.0); // scale the input_diff such that [-65535, 0]
|
||||
// correspond to [-10.0, 0.0]
|
||||
QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier,
|
||||
&input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
} else {
|
||||
int input_left_shift;
|
||||
tflite::PreprocessSoftmaxScaling(
|
||||
static_cast<double>(params->beta),
|
||||
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
|
||||
&op_data->input_multiplier, &input_left_shift);
|
||||
op_data->input_left_shift = input_left_shift;
|
||||
op_data->diff_min =
|
||||
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
|
||||
op_data->input_left_shift);
|
||||
}
|
||||
} else {
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
op_data->beta = static_cast<double>(params->beta);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
|
||||
}
|
||||
|
||||
TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
|
||||
TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE(context, node->user_data != nullptr);
|
||||
SoftmaxParams* op_data = static_cast<SoftmaxParams*>(node->user_data);
|
||||
|
||||
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
|
||||
auto ret_val =
|
||||
CalculateSoftmaxParams(context, input, output, params, op_data);
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,128 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace split {
|
||||
|
||||
template <typename T>
|
||||
TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input, int axis_value) {
|
||||
const int output_count = NumOutputs(node);
|
||||
const TfLiteIntArray* input_dims = input->dims;
|
||||
const TfLiteEvalTensor* output0 =
|
||||
tflite::micro::GetEvalOutput(context, node, 0);
|
||||
const TfLiteIntArray* output_dims = output0->dims;
|
||||
|
||||
const int split_dimensions = input_dims->size;
|
||||
int axis = axis_value < 0 ? axis_value + split_dimensions : axis_value;
|
||||
|
||||
TFLITE_DCHECK_LT(axis, split_dimensions);
|
||||
TFLITE_DCHECK_EQ(output_dims->size, split_dimensions);
|
||||
|
||||
int64_t split_size = output_dims->data[axis] * output_count;
|
||||
|
||||
TFLITE_DCHECK_EQ(split_size, input_dims->data[axis]);
|
||||
int64_t outer_size = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
outer_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
int64_t base_inner_size = 1;
|
||||
for (int i = axis + 1; i < split_dimensions; ++i) {
|
||||
base_inner_size *= input_dims->data[i];
|
||||
}
|
||||
|
||||
const T* input_ptr = tflite::micro::GetTensorData<T>(input);
|
||||
for (int k = 0; k < outer_size; ++k) {
|
||||
for (int i = 0; i < output_count; ++i) {
|
||||
TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i);
|
||||
T* output_data = tflite::micro::GetTensorData<T>(t);
|
||||
const int copy_size = output_dims->data[axis] * base_inner_size;
|
||||
T* output_ptr = output_data + k * copy_size;
|
||||
for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
|
||||
input_ptr += copy_size;
|
||||
}
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 0);
|
||||
TF_LITE_ENSURE(context, axis != nullptr);
|
||||
|
||||
// Dynamic output tensors are needed if axis tensor is not constant.
|
||||
// But Micro doesn't support dynamic memory allocation, so we only support
|
||||
// constant axis tensor for now.
|
||||
TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
|
||||
"Non constant axis tensor not supported");
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(axis);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0);
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1);
|
||||
|
||||
int axis_value = tflite::micro::GetTensorData<int32_t>(axis)[0];
|
||||
if (axis_value < 0) {
|
||||
axis_value += input->dims->size;
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE(context, axis_value >= 0);
|
||||
TF_LITE_ENSURE(context, axis_value < input->dims->size);
|
||||
|
||||
switch (input->type) {
|
||||
case kTfLiteFloat32: {
|
||||
return SplitImpl<float>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
return SplitImpl<int8_t>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt16: {
|
||||
return SplitImpl<int16_t>(context, node, input, axis_value);
|
||||
}
|
||||
case kTfLiteInt32: {
|
||||
return SplitImpl<int32_t>(context, node, input, axis_value);
|
||||
}
|
||||
default:
|
||||
MicroPrintf("Type %s currently not supported.",
|
||||
TfLiteTypeGetName(input->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace split
|
||||
|
||||
TfLiteRegistration Register_SPLIT() {
|
||||
return tflite::micro::RegisterOp(nullptr, split::Prepare, split::Eval);
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
@@ -1,247 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_context.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
constexpr int kInputTensor1 = 0;
|
||||
constexpr int kInputTensor2 = 1;
|
||||
constexpr int kOutputTensor = 0;
|
||||
|
||||
struct OpData {
|
||||
bool requires_broadcast;
|
||||
ArithmeticParams arithmetic_params;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T SquaredDifference(T input1, T input2) {
|
||||
const T difference = input1 - input2;
|
||||
return difference * difference;
|
||||
}
|
||||
|
||||
void* SquaredDifferenceInit(TfLiteContext* context, const char* buffer,
|
||||
size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context,
|
||||
TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
data->requires_broadcast = false;
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input1 =
|
||||
micro_context->AllocateTempInputTensor(node, kInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
TfLiteTensor* input2 =
|
||||
micro_context->AllocateTempInputTensor(node, kInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
|
||||
output->type = input2->type;
|
||||
|
||||
// Ensure the quantization parameters are equivalent.
|
||||
if (input1->type == kTfLiteInt8) {
|
||||
const auto& input1_quantization_params = input1->params;
|
||||
const auto& input2_quantization_params = input2->params;
|
||||
const auto& output_quantization_params = output->params;
|
||||
const int32_t integer_type_min = std::numeric_limits<int8_t>::min();
|
||||
const int32_t integer_type_max = std::numeric_limits<int8_t>::max();
|
||||
TF_LITE_ENSURE(context,
|
||||
input1_quantization_params.zero_point >= integer_type_min);
|
||||
TF_LITE_ENSURE(context,
|
||||
input1_quantization_params.zero_point <= integer_type_max);
|
||||
TF_LITE_ENSURE(context,
|
||||
input2_quantization_params.zero_point >= integer_type_min);
|
||||
TF_LITE_ENSURE(context,
|
||||
input2_quantization_params.zero_point <= integer_type_max);
|
||||
TF_LITE_ENSURE(context,
|
||||
output_quantization_params.zero_point >= integer_type_min);
|
||||
TF_LITE_ENSURE(context,
|
||||
output_quantization_params.zero_point <= integer_type_max);
|
||||
data->arithmetic_params.input1_offset =
|
||||
-input1_quantization_params.zero_point;
|
||||
data->arithmetic_params.input2_offset =
|
||||
-input2_quantization_params.zero_point;
|
||||
data->arithmetic_params.output_offset =
|
||||
output_quantization_params.zero_point;
|
||||
|
||||
// shift to make integer for scales.
|
||||
// 7 is selected so that maximum shifted result 255^2 * (1 << (7 * 2 ))
|
||||
// does not overflow signed 32-bit integer
|
||||
data->arithmetic_params.left_shift = 7;
|
||||
const double twice_max_input_scale =
|
||||
2.0 * static_cast<double>(std::max(input1_quantization_params.scale,
|
||||
input2_quantization_params.scale));
|
||||
const double real_input1_multiplier =
|
||||
static_cast<double>(input1_quantization_params.scale) /
|
||||
twice_max_input_scale;
|
||||
double real_input2_multiplier =
|
||||
static_cast<double>(input2_quantization_params.scale) /
|
||||
twice_max_input_scale;
|
||||
const double real_output_multiplier =
|
||||
(twice_max_input_scale * twice_max_input_scale) /
|
||||
static_cast<double>((1 << data->arithmetic_params.left_shift * 2) *
|
||||
output_quantization_params.scale);
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input1_multiplier, &data->arithmetic_params.input1_multiplier,
|
||||
&data->arithmetic_params.input1_shift);
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input2_multiplier, &data->arithmetic_params.input2_multiplier,
|
||||
&data->arithmetic_params.input2_shift);
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_output_multiplier, &data->arithmetic_params.output_multiplier,
|
||||
&data->arithmetic_params.output_shift);
|
||||
data->arithmetic_params.quantized_activation_min =
|
||||
std::numeric_limits<int8_t>::min();
|
||||
data->arithmetic_params.quantized_activation_max =
|
||||
std::numeric_limits<int8_t>::max();
|
||||
}
|
||||
|
||||
data->requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input1);
|
||||
micro_context->DeallocateTempTfLiteTensor(input2);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
inline int8_t SquaredDifference(int8_t x, int8_t y,
|
||||
const ArithmeticParams& params) {
|
||||
const int32_t input1_val = params.input1_offset + x;
|
||||
const int32_t input2_val = params.input2_offset + y;
|
||||
const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32_t scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32_t scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32_t raw_diff = scaled_input1_val - scaled_input2_val;
|
||||
|
||||
// Max of this is 255^2 * (1 << 14), so won't overflow 32 bits.
|
||||
const int32_t squared_raw_diff = raw_diff * raw_diff;
|
||||
const int32_t raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
squared_raw_diff, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
const int32_t clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
return static_cast<int8_t>(clamped_output);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EvalQuantizedSquaredDifference(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
const auto* op_data = static_cast<const OpData*>(node->user_data);
|
||||
if (data->requires_broadcast) {
|
||||
reference_integer_ops::BroadcastBinaryFunction4DSlow(
|
||||
op_data->arithmetic_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<T>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<T>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<T>(output),
|
||||
reference_integer_ops::CheckArithmeticParams, SquaredDifference);
|
||||
} else {
|
||||
const int flat_size = tflite::micro::GetTensorShape(input1).FlatSize();
|
||||
reference_integer_ops::ElementWise(
|
||||
flat_size, op_data->arithmetic_params,
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(output),
|
||||
reference_integer_ops::CheckArithmeticParams, SquaredDifference);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EvalSquaredDifference(TfLiteContext* context, TfLiteNode* node,
|
||||
const OpData* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
if (data->requires_broadcast) {
|
||||
reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<T>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<T>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<T>(output), SquaredDifference<T>);
|
||||
} else {
|
||||
reference_ops::BinaryFunction<T, T, T>(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<T>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<T>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<T>(output), SquaredDifference<T>);
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus SquaredDifferenceEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
OpData* data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalSquaredDifference<float>(context, node, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteInt32) {
|
||||
EvalSquaredDifference<int32_t>(context, node, data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteInt8) {
|
||||
EvalQuantizedSquaredDifference<int8_t>(context, node, data, input1, input2,
|
||||
output);
|
||||
} else {
|
||||
MicroPrintf(
|
||||
"SquaredDifference only supports FLOAT32, INT32 and INT8 now, got %d.",
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_SQUARED_DIFFERENCE() {
|
||||
return tflite::micro::RegisterOp(
|
||||
SquaredDifferenceInit, SquaredDifferencePrepare, SquaredDifferenceEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,168 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/sub.h"
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/sub.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
void* SubInit(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataSub));
|
||||
}
|
||||
|
||||
void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
|
||||
const OpDataSub* data, const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
|
||||
float output_activation_min, output_activation_max;
|
||||
CalculateActivationRange(params->activation, &output_activation_min,
|
||||
&output_activation_max);
|
||||
tflite::ArithmeticParams op_params;
|
||||
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||
if (data->requires_broadcast) {
|
||||
tflite::reference_ops::BroadcastSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
} else {
|
||||
tflite::reference_ops::SubWithActivation(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<float>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<float>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<float>(output));
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteSubParams* params, const OpDataSub* data,
|
||||
const TfLiteEvalTensor* input1,
|
||||
const TfLiteEvalTensor* input2,
|
||||
TfLiteEvalTensor* output) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
op_params.input1_offset = data->input1_offset;
|
||||
op_params.input1_multiplier = data->input1_multiplier;
|
||||
op_params.input1_shift = data->input1_shift;
|
||||
op_params.input2_offset = data->input2_offset;
|
||||
op_params.input2_multiplier = data->input2_multiplier;
|
||||
op_params.input2_shift = data->input2_shift;
|
||||
op_params.output_offset = data->output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
SetActivationParams(data->output_activation_min, data->output_activation_max,
|
||||
&op_params);
|
||||
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||
tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorShape(input2), &op_params);
|
||||
|
||||
switch (output->type) {
|
||||
case kTfLiteInt8: {
|
||||
if (need_broadcast) {
|
||||
tflite::reference_ops::BroadcastQuantSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Sub(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int8_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int8_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt16: {
|
||||
if (need_broadcast) {
|
||||
tflite::reference_ops::BroadcastQuantSubSlow(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int16_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int16_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
} else {
|
||||
tflite::reference_ops::Sub(
|
||||
op_params, tflite::micro::GetTensorShape(input1),
|
||||
tflite::micro::GetTensorData<int16_t>(input1),
|
||||
tflite::micro::GetTensorShape(input2),
|
||||
tflite::micro::GetTensorData<int16_t>(input2),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int16_t>(output));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MicroPrintf("Quantized type %s not currently supported.",
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus SubEval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
|
||||
|
||||
const TfLiteEvalTensor* input1 =
|
||||
tflite::micro::GetEvalInput(context, node, kSubInputTensor1);
|
||||
const TfLiteEvalTensor* input2 =
|
||||
tflite::micro::GetEvalInput(context, node, kSubInputTensor2);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kSubOutputTensor);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpDataSub& data = *(static_cast<const OpDataSub*>(node->user_data));
|
||||
|
||||
if (output->type == kTfLiteFloat32) {
|
||||
EvalSub(context, node, params, &data, input1, input2, output);
|
||||
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
|
||||
TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data,
|
||||
input1, input2, output));
|
||||
} else {
|
||||
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
|
||||
output->type);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteRegistration Register_SUB() {
|
||||
return tflite::micro::RegisterOp(SubInit, SubPrepare, SubEval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,60 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
extern const int kSubInputTensor1;
|
||||
extern const int kSubInputTensor2;
|
||||
extern const int kSubOutputTensor;
|
||||
|
||||
struct OpDataSub {
|
||||
bool requires_broadcast;
|
||||
|
||||
// These fields are used in both the general 8-bit -> 8bit quantized path,
|
||||
// and the special 16-bit -> 16bit quantized path
|
||||
int input1_shift;
|
||||
int input2_shift;
|
||||
int32_t output_activation_min;
|
||||
int32_t output_activation_max;
|
||||
|
||||
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||
int32_t input1_multiplier;
|
||||
int32_t input2_multiplier;
|
||||
int32_t output_multiplier;
|
||||
int output_shift;
|
||||
int left_shift;
|
||||
int32_t input1_offset;
|
||||
int32_t input2_offset;
|
||||
int32_t output_offset;
|
||||
};
|
||||
|
||||
TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output, OpDataSub* data);
|
||||
|
||||
TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_
|
||||
@@ -1,107 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/add.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||
#include "tensorflow/lite/kernels/internal/reference/sub.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/internal/types.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/sub.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
const int kSubInputTensor1 = 0;
|
||||
const int kSubInputTensor2 = 1;
|
||||
const int kSubOutputTensor = 0;
|
||||
|
||||
TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output, OpDataSub* data) {
|
||||
data->requires_broadcast = !HaveSameShapes(input1, input2);
|
||||
|
||||
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
|
||||
// 8bit -> 8bit general quantized path, with general rescalings
|
||||
data->input1_offset = -input1->params.zero_point;
|
||||
data->input2_offset = -input2->params.zero_point;
|
||||
data->output_offset = output->params.zero_point;
|
||||
|
||||
// The shift is set to 15 in case of 16-bit and 20 in case of 8-bit,
|
||||
// accordingly. In case of 16-bit we have 65535 << 15 which is less than 1
|
||||
// << 31, therefore the addition will still fit in a 32 bit accumulator.
|
||||
data->left_shift = output->type == kTfLiteInt16 ? 15 : 20;
|
||||
const float twice_max_input_scale =
|
||||
2 * std::max(input1->params.scale, input2->params.scale);
|
||||
const double real_input1_multiplier =
|
||||
static_cast<double>(input1->params.scale / twice_max_input_scale);
|
||||
const double real_input2_multiplier =
|
||||
static_cast<double>(input2->params.scale / twice_max_input_scale);
|
||||
const double real_output_multiplier =
|
||||
static_cast<double>(twice_max_input_scale /
|
||||
((1 << data->left_shift) * output->params.scale));
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
|
||||
|
||||
QuantizeMultiplierSmallerThanOneExp(
|
||||
real_output_multiplier, &data->output_multiplier, &data->output_shift);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||
context, params->activation, output, &data->output_activation_min,
|
||||
&data->output_activation_max));
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
OpDataSub* data = static_cast<OpDataSub*>(node->user_data);
|
||||
auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
TfLiteTensor* input1 =
|
||||
micro_context->AllocateTempInputTensor(node, kSubInputTensor1);
|
||||
TF_LITE_ENSURE(context, input1 != nullptr);
|
||||
TfLiteTensor* input2 =
|
||||
micro_context->AllocateTempInputTensor(node, kSubInputTensor2);
|
||||
TF_LITE_ENSURE(context, input2 != nullptr);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kSubOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
CalculateOpDataSub(context, params, input1, input2, output, data));
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input1);
|
||||
micro_context->DeallocateTempTfLiteTensor(input2);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,106 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/kernels/svdf.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/activation_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpDataSvdf));
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
const OpDataSvdf& data = *(static_cast<const OpDataSvdf*>(node->user_data));
|
||||
|
||||
const TfLiteEvalTensor* input =
|
||||
tflite::micro::GetEvalInput(context, node, kSvdfInputTensor);
|
||||
const TfLiteEvalTensor* weights_feature =
|
||||
tflite::micro::GetEvalInput(context, node, kSvdfWeightsFeatureTensor);
|
||||
const TfLiteEvalTensor* weights_time =
|
||||
tflite::micro::GetEvalInput(context, node, kSvdfWeightsTimeTensor);
|
||||
const TfLiteEvalTensor* bias =
|
||||
(NumInputs(node) == 5)
|
||||
? tflite::micro::GetEvalInput(context, node, kSvdfBiasTensor)
|
||||
: nullptr;
|
||||
TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
|
||||
context, node, kSvdfInputActivationStateTensor);
|
||||
TfLiteEvalTensor* output =
|
||||
tflite::micro::GetEvalOutput(context, node, kSvdfOutputTensor);
|
||||
|
||||
switch (weights_feature->type) {
|
||||
case kTfLiteFloat32: {
|
||||
EvalFloatSvdfReference(
|
||||
context, node, input, weights_feature, weights_time, bias, params,
|
||||
data.scratch_tensor_index, activation_state, output);
|
||||
return kTfLiteOk;
|
||||
break;
|
||||
}
|
||||
|
||||
case kTfLiteInt8: {
|
||||
switch (weights_time->type) {
|
||||
case kTfLiteInt16: {
|
||||
EvalInt16SvdfReference(context, node, input, weights_feature,
|
||||
weights_time, bias, params, activation_state,
|
||||
output, data);
|
||||
return kTfLiteOk;
|
||||
break;
|
||||
}
|
||||
case kTfLiteInt8: {
|
||||
EvalInt8SvdfReference(context, node, input, weights_feature,
|
||||
weights_time, bias, params, activation_state,
|
||||
output, data);
|
||||
return kTfLiteOk;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MicroPrintf("Type %s not currently supported.",
|
||||
TfLiteTypeGetName(weights_time->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
MicroPrintf("Type %s not currently supported.",
|
||||
TfLiteTypeGetName(weights_feature->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteRegistration Register_SVDF() {
|
||||
return tflite::micro::RegisterOp(Init, PrepareSvdf, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,514 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/activation_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/svdf.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
/**
|
||||
* This version of SVDF is specific to TFLite Micro. It contains the following
|
||||
* differences between the TFLite version:
|
||||
*
|
||||
* 1.) Scratch tensor allocation - scratch tensors must be known ahead of time
|
||||
* for the Micro interpreter.
|
||||
* 2.) Output dimensions - the TFLite version determines output size and runtime
|
||||
* and resizes the output tensor. Micro runtime does not support tensor
|
||||
* resizing.
|
||||
*/
|
||||
|
||||
const int kSvdfInputTensor = 0;
|
||||
const int kSvdfWeightsFeatureTensor = 1;
|
||||
const int kSvdfWeightsTimeTensor = 2;
|
||||
const int kSvdfBiasTensor = 3;
|
||||
const int kSvdfInputActivationStateTensor =
|
||||
4; // This is a variable tensor, and will be modified by this op.
|
||||
const int kSvdfOutputTensor = 0;
|
||||
|
||||
template <typename T>
|
||||
void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input_tensor,
|
||||
const TfLiteEvalTensor* weights_feature_tensor,
|
||||
const TfLiteEvalTensor* weights_time_tensor,
|
||||
const TfLiteEvalTensor* bias_tensor,
|
||||
const TfLiteSVDFParams* params,
|
||||
TfLiteEvalTensor* activation_state_tensor,
|
||||
TfLiteEvalTensor* output_tensor,
|
||||
const OpDataSvdf& data) {
|
||||
const int n_rank = params->rank;
|
||||
const int n_batch = input_tensor->dims->data[0];
|
||||
const int n_input = input_tensor->dims->data[1];
|
||||
const int n_filter = weights_feature_tensor->dims->data[0];
|
||||
const int n_unit = n_filter / n_rank;
|
||||
const int n_memory = weights_time_tensor->dims->data[1];
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
|
||||
int32_t* scratch_tensor = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, data.scratch_tensor_index));
|
||||
int32_t* scratch_output_tensor = static_cast<int32_t*>(
|
||||
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
|
||||
|
||||
// Shift states.
|
||||
T* const state_ptr = tflite::micro::GetTensorData<T>(activation_state_tensor);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
T* new_state_start = state_ptr;
|
||||
const T* old_state_start = state_ptr + 1;
|
||||
const T* old_state_end = state_ptr + n_batch * n_filter * n_memory;
|
||||
while (old_state_start != old_state_end) {
|
||||
*new_state_start++ = *old_state_start++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: no need to clear the latest activation, matmul is not accumulative.
|
||||
|
||||
// Feature matmul.
|
||||
{
|
||||
T* state = tflite::micro::GetTensorData<T>(activation_state_tensor);
|
||||
const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
|
||||
const int8_t* weight_feature =
|
||||
tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
|
||||
const int32_t output_max = std::numeric_limits<T>::max();
|
||||
const int32_t output_min = std::numeric_limits<T>::min();
|
||||
T* result_in_batch = state + (n_memory - 1);
|
||||
for (int b = 0; b < n_batch; b++) {
|
||||
const int8_t* matrix_ptr = weight_feature;
|
||||
for (int r = 0; r < n_filter; r++) {
|
||||
int32_t dot_prod = 0;
|
||||
const int8_t* vector_in_batch = input + b * n_input;
|
||||
for (int c = 0; c < n_input; c++) {
|
||||
dot_prod +=
|
||||
*matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
|
||||
}
|
||||
dot_prod = MultiplyByQuantizedMultiplier(
|
||||
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
|
||||
dot_prod = std::min(std::max(output_min, dot_prod), output_max);
|
||||
// The int16 version of the op assumes a zero_point of 0. This
|
||||
// code accounts for the potentially non-zero zero_point for the int8
|
||||
// version of the op.
|
||||
*result_in_batch = data.activation_state_zero_point + dot_prod;
|
||||
result_in_batch += n_memory;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Time.
|
||||
{
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Perform batched vector dot product:
|
||||
const T* vector1_ptr =
|
||||
tflite::micro::GetTensorData<T>(weights_time_tensor);
|
||||
const T* vector2_ptr =
|
||||
tflite::micro::GetTensorData<T>(activation_state_tensor) +
|
||||
b * n_memory * n_filter;
|
||||
|
||||
for (int i = 0; i < n_filter; i++) {
|
||||
*scratch_ptr_batch = 0;
|
||||
for (int j = 0; j < n_memory; j++) {
|
||||
*scratch_ptr_batch +=
|
||||
*vector1_ptr++ *
|
||||
(*vector2_ptr++ - data.activation_state_zero_point);
|
||||
}
|
||||
scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce, add bias, rescale, activation.
|
||||
{
|
||||
// Add bias.
|
||||
if (bias_tensor) {
|
||||
// Vector batch assign:
|
||||
const int32_t* bias_data =
|
||||
tflite::micro::GetTensorData<int32_t>(bias_tensor);
|
||||
for (int i = 0; i < n_batch; ++i) {
|
||||
int32_t* output_ptr = scratch_output_tensor + i * n_unit;
|
||||
const int32_t* bias_ptr = bias_data;
|
||||
for (int j = 0; j < n_unit; ++j) {
|
||||
*output_ptr++ = *bias_ptr++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int32_t* output_ptr = scratch_output_tensor;
|
||||
for (int i = 0; i < n_batch * n_unit; ++i) {
|
||||
*output_ptr++ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce.
|
||||
for (int b = 0; b < n_batch; ++b) {
|
||||
int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit;
|
||||
int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
|
||||
|
||||
// Reduction sum vector
|
||||
for (int i = 0; i < n_unit; ++i) {
|
||||
for (int j = 0; j < n_rank; ++j) {
|
||||
output_temp_ptr[i] += *scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rescale.
|
||||
const int32_t output_max = std::numeric_limits<int8_t>::max();
|
||||
const int32_t output_min = std::numeric_limits<int8_t>::min();
|
||||
for (int i = 0; i < n_batch * n_unit; ++i) {
|
||||
int32_t x1 = scratch_output_tensor[i];
|
||||
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
|
||||
data.effective_scale_2_b);
|
||||
int32_t x3 = x2 + data.output_zero_point;
|
||||
int32_t x4 = std::min(std::max(output_min, x3), output_max);
|
||||
tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
|
||||
static_cast<int8_t>(x4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate two versions of the integer code. One with int16_t type for the
|
||||
* time weights and the activation state, and another one with int8_t for the
|
||||
* same.
|
||||
*/
|
||||
|
||||
void EvalInt16SvdfReference(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input_tensor,
|
||||
const TfLiteEvalTensor* weights_feature_tensor,
|
||||
const TfLiteEvalTensor* weights_time_tensor,
|
||||
const TfLiteEvalTensor* bias_tensor,
|
||||
const TfLiteSVDFParams* params,
|
||||
TfLiteEvalTensor* activation_state_tensor,
|
||||
TfLiteEvalTensor* output_tensor,
|
||||
const OpDataSvdf& data) {
|
||||
EvalIntegerSvdfReference<int16_t>(
|
||||
context, node, input_tensor, weights_feature_tensor, weights_time_tensor,
|
||||
bias_tensor, params, activation_state_tensor, output_tensor, data);
|
||||
}
|
||||
|
||||
void EvalInt8SvdfReference(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteEvalTensor* input_tensor,
|
||||
const TfLiteEvalTensor* weights_feature_tensor,
|
||||
const TfLiteEvalTensor* weights_time_tensor,
|
||||
const TfLiteEvalTensor* bias_tensor,
|
||||
const TfLiteSVDFParams* params,
|
||||
TfLiteEvalTensor* activation_state_tensor,
|
||||
TfLiteEvalTensor* output_tensor,
|
||||
const OpDataSvdf& data) {
|
||||
EvalIntegerSvdfReference<int8_t>(
|
||||
context, node, input_tensor, weights_feature_tensor, weights_time_tensor,
|
||||
bias_tensor, params, activation_state_tensor, output_tensor, data);
|
||||
}
|
||||
|
||||
static inline void ApplyTimeWeightsBiasAndActivation(
|
||||
int batch_size, int memory_size, int num_filters, int num_units, int rank,
|
||||
const float* const weights_time_ptr, const float* const bias_ptr,
|
||||
TfLiteFusedActivation activation, float* const state_ptr,
|
||||
float* const scratch_ptr, float* const output_ptr) {
|
||||
// Compute matmul(activation_state, weights_time).
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
// Perform batched vector dot product:
|
||||
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
|
||||
const float* vector1_ptr = weights_time_ptr;
|
||||
const float* vector2_ptr = state_ptr + b * memory_size * num_filters;
|
||||
for (int i = 0; i < num_filters; ++i) {
|
||||
*scratch_ptr_batch = 0.f;
|
||||
for (int j = 0; j < memory_size; ++j) {
|
||||
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
|
||||
}
|
||||
scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize output with bias if provided.
|
||||
if (bias_ptr) {
|
||||
// VectorBatchVectorAssign
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
float* output_data = output_ptr + i * num_units;
|
||||
const float* bias_data = bias_ptr;
|
||||
for (int j = 0; j < num_units; ++j) {
|
||||
*output_data++ = *bias_data++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
float* output_data = output_ptr;
|
||||
for (int i = 0; i < batch_size * num_units; ++i) {
|
||||
*output_data++ = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
// Reduction sum.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output_ptr + b * num_units;
|
||||
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
|
||||
|
||||
// Reduction sum vector
|
||||
for (int i = 0; i < num_units; ++i) {
|
||||
for (int j = 0; j < rank; j++) {
|
||||
output_ptr_batch[i] += *scratch_ptr_batch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply activation.
|
||||
for (int b = 0; b < batch_size; ++b) {
|
||||
float* output_ptr_batch = output_ptr + b * num_units;
|
||||
for (int i = 0; i < num_units; ++i) {
|
||||
*output_ptr_batch =
|
||||
tflite::ops::micro::ActivationValFloat(activation, *output_ptr_batch);
|
||||
++output_ptr_batch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EvalFloatSvdfReference(
|
||||
TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
|
||||
const TfLiteEvalTensor* weights_feature,
|
||||
const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
|
||||
const TfLiteSVDFParams* params, int scratch_tensor_index,
|
||||
TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
|
||||
const int rank = params->rank;
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int input_size = input->dims->data[1];
|
||||
const int num_filters = weights_feature->dims->data[0];
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
const float* weights_feature_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_feature);
|
||||
const float* weights_time_ptr =
|
||||
tflite::micro::GetTensorData<float>(weights_time);
|
||||
const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
|
||||
const float* input_ptr = tflite::micro::GetTensorData<float>(input);
|
||||
|
||||
float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
|
||||
|
||||
TFLITE_DCHECK(context != nullptr);
|
||||
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||
|
||||
float* scratch_ptr = static_cast<float*>(
|
||||
context->GetScratchBuffer(context, scratch_tensor_index));
|
||||
|
||||
float* output_ptr = tflite::micro::GetTensorData<float>(output);
|
||||
|
||||
// Left shift the activation_state.
|
||||
{
|
||||
float* new_state_start = state_ptr;
|
||||
const float* old_state_start = state_ptr + 1;
|
||||
const float* old_state_end =
|
||||
state_ptr + batch_size * num_filters * memory_size;
|
||||
while (old_state_start != old_state_end) {
|
||||
*new_state_start++ = *old_state_start++;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: no need to clear the latest activation, matmul is not accumulative.
|
||||
|
||||
// Compute conv1d(inputs, weights_feature).
|
||||
// The activation_state's rightmost column is used to save current cycle
|
||||
// activation. This is achieved by starting at state_ptr[memory_size - 1] and
|
||||
// having the stride equal to memory_size.
|
||||
|
||||
// Perform batched matrix vector multiply operation:
|
||||
{
|
||||
const float* matrix = weights_feature_ptr;
|
||||
const float* vector = input_ptr;
|
||||
float* result = &state_ptr[memory_size - 1];
|
||||
float* result_in_batch = result;
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
const float* matrix_ptr = matrix;
|
||||
for (int j = 0; j < num_filters; ++j) {
|
||||
float dot_prod = 0.0f;
|
||||
const float* vector_in_batch = vector + i * input_size;
|
||||
for (int k = 0; k < input_size; ++k) {
|
||||
dot_prod += *matrix_ptr++ * *vector_in_batch++;
|
||||
}
|
||||
*result_in_batch = dot_prod;
|
||||
result_in_batch += memory_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ApplyTimeWeightsBiasAndActivation(
|
||||
batch_size, memory_size, num_filters, num_units, rank, weights_time_ptr,
|
||||
bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr);
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||
|
||||
const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
|
||||
|
||||
MicroContext* micro_context = GetMicroContext(context);
|
||||
|
||||
// Validate Tensor Inputs (dtype depends on quantization):
|
||||
// [0] = Input, {2, batch_size, input_size}
|
||||
// [1] = Weights Feature, {2, num_filters, input_size}
|
||||
// [2] = Weights Time, {2, num_filters, memory_size}
|
||||
// [3] = Bias (optional), {1, num_units}
|
||||
// [4] = Activation State (variable),
|
||||
// {2, batch_size, memory_size * num_filters}
|
||||
TfLiteTensor* input =
|
||||
micro_context->AllocateTempInputTensor(node, kSvdfInputTensor);
|
||||
TF_LITE_ENSURE(context, input != nullptr);
|
||||
TfLiteTensor* weights_feature =
|
||||
micro_context->AllocateTempInputTensor(node, kSvdfWeightsFeatureTensor);
|
||||
TF_LITE_ENSURE(context, weights_feature != nullptr);
|
||||
TfLiteTensor* weights_time =
|
||||
micro_context->AllocateTempInputTensor(node, kSvdfWeightsTimeTensor);
|
||||
TF_LITE_ENSURE(context, weights_time != nullptr);
|
||||
TfLiteTensor* bias =
|
||||
micro_context->AllocateTempInputTensor(node, kSvdfBiasTensor);
|
||||
TfLiteTensor* activation_state = micro_context->AllocateTempInputTensor(
|
||||
node, kSvdfInputActivationStateTensor);
|
||||
TF_LITE_ENSURE(context, activation_state != nullptr);
|
||||
|
||||
// Define input constants based on input tensor definition above:
|
||||
const int rank = params->rank;
|
||||
const int input_size = input->dims->data[1];
|
||||
const int batch_size = input->dims->data[0];
|
||||
const int num_filters = weights_feature->dims->data[0];
|
||||
TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
|
||||
const int num_units = num_filters / rank;
|
||||
const int memory_size = weights_time->dims->data[1];
|
||||
|
||||
// Validate Input Tensor:
|
||||
TF_LITE_ENSURE(context,
|
||||
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
|
||||
|
||||
// Validate Tensor Output:
|
||||
// [0] = float/int8_t, {2, batch_size, num_units}
|
||||
TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
|
||||
TfLiteTensor* output =
|
||||
micro_context->AllocateTempOutputTensor(node, kSvdfOutputTensor);
|
||||
TF_LITE_ENSURE(context, output != nullptr);
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
|
||||
|
||||
// Validate Weights Feature Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2);
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size);
|
||||
|
||||
// Validate Weights Time Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(weights_time), 2);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[0], num_filters);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
|
||||
|
||||
// Validate Optional Bias Input Tensor:
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
|
||||
}
|
||||
|
||||
// Validate Activation State Input Tensor:
|
||||
TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
|
||||
memory_size * num_filters);
|
||||
// Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
|
||||
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
OpDataSvdf* data = static_cast<OpDataSvdf*>(node->user_data);
|
||||
|
||||
if (input->type == kTfLiteInt8) {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
|
||||
TF_LITE_ENSURE(context, (weights_time->type == kTfLiteInt16) ||
|
||||
(weights_time->type == kTfLiteInt8));
|
||||
TF_LITE_ENSURE(context, (activation_state->type == kTfLiteInt16) ||
|
||||
(activation_state->type == kTfLiteInt8));
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
|
||||
}
|
||||
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||
|
||||
const double effective_scale_1 = static_cast<double>(
|
||||
input->params.scale * weights_feature->params.scale /
|
||||
activation_state->params.scale);
|
||||
const double effective_scale_2 =
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale / output->params.scale);
|
||||
|
||||
// TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
|
||||
TF_LITE_ENSURE(
|
||||
context,
|
||||
std::abs(static_cast<double>(bias->params.scale) -
|
||||
static_cast<double>(activation_state->params.scale *
|
||||
weights_time->params.scale)) < 1e-5);
|
||||
|
||||
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
|
||||
&(data->effective_scale_1_b));
|
||||
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
|
||||
&(data->effective_scale_2_b));
|
||||
|
||||
data->input_zero_point = input->params.zero_point;
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
data->activation_state_zero_point = activation_state->params.zero_point;
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_filters * sizeof(int32_t),
|
||||
&(data->scratch_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_status);
|
||||
|
||||
const TfLiteStatus scratch_output_status =
|
||||
context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_units * sizeof(int32_t),
|
||||
&(data->scratch_output_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_output_status);
|
||||
} else {
|
||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
|
||||
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
|
||||
if (bias != nullptr) {
|
||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
|
||||
}
|
||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||
|
||||
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||
context, batch_size * num_filters * sizeof(float),
|
||||
&(data->scratch_tensor_index));
|
||||
TF_LITE_ENSURE_OK(context, scratch_status);
|
||||
}
|
||||
|
||||
micro_context->DeallocateTempTfLiteTensor(input);
|
||||
micro_context->DeallocateTempTfLiteTensor(weights_feature);
|
||||
micro_context->DeallocateTempTfLiteTensor(weights_time);
|
||||
micro_context->DeallocateTempTfLiteTensor(activation_state);
|
||||
micro_context->DeallocateTempTfLiteTensor(output);
|
||||
micro_context->DeallocateTempTfLiteTensor(bias);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,93 +0,0 @@
|
||||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_graph.h"
|
||||
#include "tensorflow/lite/micro/micro_resource_variable.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
int32_t resource_id;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
const auto* params =
|
||||
reinterpret_cast<const TfLiteVarHandleParams*>(node->builtin_data);
|
||||
|
||||
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
|
||||
MicroGraph& graph_info = micro_context->graph();
|
||||
|
||||
MicroResourceVariables* resources = graph_info.GetResourceVariables();
|
||||
if (resources == nullptr) {
|
||||
MicroPrintf(
|
||||
"VAR_HANDLE requires resource variables. Please create "
|
||||
"ResourceVariables and pass it to the interpreter.");
|
||||
return kTfLiteError;
|
||||
}
|
||||
op_data->resource_id =
|
||||
resources->CreateIdIfNoneFound(params->container, params->shared_name);
|
||||
if (op_data->resource_id < 0) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TFLITE_DCHECK(output != nullptr);
|
||||
|
||||
// Assign saved resource_id so this output tensor will always return the
|
||||
// correct resource id.
|
||||
output->data.i32 = &op_data->resource_id;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
TFLITE_DCHECK(output != nullptr);
|
||||
|
||||
// Assign saved resource_id so this output tensor will always return the
|
||||
// correct resource id.
|
||||
output->data.i32 = &op_data->resource_id;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
TfLiteRegistration Register_VAR_HANDLE() {
|
||||
return tflite::micro::RegisterOp(Init, Prepare, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,133 +0,0 @@
|
||||
/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
#include "tensorflow/lite/micro/micro_context.h"
|
||||
#include "tensorflow/lite/micro/micro_graph.h"
|
||||
#include "tensorflow/lite/micro/micro_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
int cond_subgraph_index;
|
||||
int body_subgraph_index;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
return context->AllocatePersistentBuffer(context, sizeof(OpData));
|
||||
}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
const auto* params =
|
||||
reinterpret_cast<const TfLiteWhileParams*>(node->builtin_data);
|
||||
|
||||
op_data->cond_subgraph_index = params->cond_subgraph_index;
|
||||
op_data->body_subgraph_index = params->body_subgraph_index;
|
||||
|
||||
// The first input is the condition.
|
||||
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
|
||||
|
||||
size_t num_inputs = node->inputs->size;
|
||||
size_t num_outputs = node->outputs->size;
|
||||
|
||||
MicroGraph& graph_info = micro_context->graph();
|
||||
|
||||
TF_LITE_ENSURE(context,
|
||||
op_data->cond_subgraph_index < graph_info.NumSubgraphs());
|
||||
TF_LITE_ENSURE(context,
|
||||
op_data->body_subgraph_index < graph_info.NumSubgraphs());
|
||||
|
||||
TF_LITE_ENSURE_EQ(context, num_inputs,
|
||||
graph_info.NumSubgraphInputs(op_data->cond_subgraph_index));
|
||||
TF_LITE_ENSURE_EQ(context, num_inputs,
|
||||
graph_info.NumSubgraphInputs(op_data->body_subgraph_index));
|
||||
TF_LITE_ENSURE_EQ(context, num_inputs, num_outputs);
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context, num_outputs,
|
||||
graph_info.NumSubgraphOutputs(op_data->body_subgraph_index));
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
|
||||
MicroGraph* graph_info = µ_context->graph();
|
||||
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
tflite::micro::CopyOpInputsToSubgraphInputs(
|
||||
context, node, graph_info, op_data->cond_subgraph_index,
|
||||
/*first_tensor_idx=*/0));
|
||||
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
graph_info->InvokeSubgraph(op_data->cond_subgraph_index));
|
||||
|
||||
TfLiteEvalTensor* cond_subgraph_output = graph_info->GetSubgraphOutput(
|
||||
op_data->cond_subgraph_index, /*tensor_idx=*/0);
|
||||
bool cond_value = cond_subgraph_output->data.b[0];
|
||||
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
tflite::micro::CopyOpInputsToSubgraphInputs(
|
||||
context, node, graph_info, op_data->body_subgraph_index,
|
||||
/*first_tensor_idx=*/0));
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
tflite::micro::CopyOpInputsToOpOutputs(context, node));
|
||||
|
||||
while (cond_value == true) {
|
||||
// Copy output of this iteration back to the body input.
|
||||
TF_LITE_ENSURE_OK(
|
||||
context, tflite::micro::CopyOpOutputsToSubgraphInputs(
|
||||
context, node, graph_info, op_data->body_subgraph_index));
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
graph_info->InvokeSubgraph(op_data->body_subgraph_index));
|
||||
|
||||
TF_LITE_ENSURE_OK(
|
||||
context, tflite::micro::CopySubgraphOutputsToOpOutputs(
|
||||
context, node, graph_info, op_data->body_subgraph_index));
|
||||
TF_LITE_ENSURE_OK(
|
||||
context, tflite::micro::CopyOpOutputsToSubgraphInputs(
|
||||
context, node, graph_info, op_data->cond_subgraph_index));
|
||||
TF_LITE_ENSURE_OK(context,
|
||||
graph_info->InvokeSubgraph(op_data->cond_subgraph_index));
|
||||
|
||||
cond_subgraph_output = graph_info->GetSubgraphOutput(
|
||||
op_data->cond_subgraph_index, /*tensor_idx=*/0);
|
||||
cond_value = cond_subgraph_output->data.b[0];
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
TfLiteRegistration Register_WHILE() {
|
||||
return tflite::micro::RegisterOp(Init, Prepare, Eval);
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,170 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/flatbuffer_conversions.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
uint8_t* AlignPointerUp(uint8_t* data, size_t alignment) {
|
||||
std::uintptr_t data_as_uintptr_t = reinterpret_cast<std::uintptr_t>(data);
|
||||
uint8_t* aligned_result = reinterpret_cast<uint8_t*>(
|
||||
((data_as_uintptr_t + (alignment - 1)) / alignment) * alignment);
|
||||
return aligned_result;
|
||||
}
|
||||
|
||||
uint8_t* AlignPointerDown(uint8_t* data, size_t alignment) {
|
||||
std::uintptr_t data_as_uintptr_t = reinterpret_cast<std::uintptr_t>(data);
|
||||
uint8_t* aligned_result =
|
||||
reinterpret_cast<uint8_t*>((data_as_uintptr_t / alignment) * alignment);
|
||||
return aligned_result;
|
||||
}
|
||||
|
||||
size_t AlignSizeUp(size_t size, size_t alignment) {
|
||||
size_t aligned_size = (((size + (alignment - 1)) / alignment) * alignment);
|
||||
return aligned_size;
|
||||
}
|
||||
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) {
|
||||
switch (type) {
|
||||
case kTfLiteFloat16:
|
||||
*size = sizeof(int16_t);
|
||||
break;
|
||||
case kTfLiteFloat32:
|
||||
*size = sizeof(float);
|
||||
break;
|
||||
case kTfLiteFloat64:
|
||||
*size = sizeof(double);
|
||||
break;
|
||||
case kTfLiteInt16:
|
||||
*size = sizeof(int16_t);
|
||||
break;
|
||||
case kTfLiteInt32:
|
||||
*size = sizeof(int32_t);
|
||||
break;
|
||||
case kTfLiteUInt32:
|
||||
*size = sizeof(uint32_t);
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
*size = sizeof(uint8_t);
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
*size = sizeof(int8_t);
|
||||
break;
|
||||
case kTfLiteInt64:
|
||||
*size = sizeof(int64_t);
|
||||
break;
|
||||
case kTfLiteUInt64:
|
||||
*size = sizeof(uint64_t);
|
||||
break;
|
||||
case kTfLiteBool:
|
||||
*size = sizeof(bool);
|
||||
break;
|
||||
case kTfLiteResource:
|
||||
*size = sizeof(int32_t);
|
||||
break;
|
||||
case kTfLiteComplex64:
|
||||
*size = sizeof(float) * 2;
|
||||
break;
|
||||
case kTfLiteComplex128:
|
||||
*size = sizeof(double) * 2;
|
||||
break;
|
||||
default:
|
||||
return kTfLiteError;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
|
||||
size_t* bytes, size_t* type_size,
|
||||
ErrorReporter* error_reporter) {
|
||||
int element_count = 1;
|
||||
// If flatbuffer_tensor.shape == nullptr, then flatbuffer_tensor is a scalar
|
||||
// so has 1 element.
|
||||
if (flatbuffer_tensor.shape() != nullptr) {
|
||||
for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) {
|
||||
element_count *= flatbuffer_tensor.shape()->Get(n);
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteType tf_lite_type;
|
||||
TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
|
||||
&tf_lite_type, error_reporter));
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(tf_lite_type, type_size));
|
||||
*bytes = element_count * (*type_size);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
|
||||
size_t* out_bytes) {
|
||||
TFLITE_DCHECK(out_bytes != nullptr);
|
||||
|
||||
int element_count = 1;
|
||||
// If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element.
|
||||
if (eval_tensor->dims != nullptr) {
|
||||
for (int n = 0; n < eval_tensor->dims->size; ++n) {
|
||||
element_count *= eval_tensor->dims->data[n];
|
||||
}
|
||||
}
|
||||
size_t type_size;
|
||||
TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size));
|
||||
*out_bytes = element_count * type_size;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
const TfLiteTensor* input = nullptr;
|
||||
|
||||
TF_LITE_ENSURE(context, input1->dims != nullptr);
|
||||
TF_LITE_ENSURE(context, input2->dims != nullptr);
|
||||
TF_LITE_ENSURE(context, output->dims->size == 0);
|
||||
|
||||
input = input1->dims->size > input2->dims->size ? input1 : input2;
|
||||
TF_LITE_ENSURE(context, output->type == input->type);
|
||||
|
||||
size_t size = 0;
|
||||
TfLiteTypeSizeOf(input->type, &size);
|
||||
const int dimensions_count = tflite::GetTensorShape(input).DimensionsCount();
|
||||
for (int i = 0; i < dimensions_count; i++) {
|
||||
size *= input->dims->data[i];
|
||||
}
|
||||
|
||||
output->bytes = size;
|
||||
|
||||
output->dims =
|
||||
reinterpret_cast<TfLiteIntArray*>(context->AllocatePersistentBuffer(
|
||||
context, TfLiteIntArrayGetSizeInBytes(size)));
|
||||
|
||||
output->dims->size = input->dims->size;
|
||||
for (int i = 0; i < dimensions_count; i++) {
|
||||
output->dims->data[i] = input->dims->data[i];
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,59 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/schema/schema_generated.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// Returns the next pointer address aligned to the given alignment.
|
||||
uint8_t* AlignPointerUp(uint8_t* data, size_t alignment);
|
||||
|
||||
// Returns the previous pointer address aligned to the given alignment.
|
||||
uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
|
||||
|
||||
// Returns an increased size that's a multiple of alignment.
|
||||
size_t AlignSizeUp(size_t size, size_t alignment);
|
||||
|
||||
// Returns size in bytes for a given TfLiteType.
|
||||
TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
|
||||
|
||||
// How many bytes are needed to hold a tensor's contents.
|
||||
TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
|
||||
size_t* bytes, size_t* type_size,
|
||||
ErrorReporter* error_reporter);
|
||||
|
||||
// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
|
||||
// returned in out_bytes.
|
||||
TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
|
||||
size_t* out_bytes);
|
||||
|
||||
// Deduce output dimensions from input and allocate given size.
|
||||
// Useful for operators with two inputs where the largest input should equal the
|
||||
// output dimension.
|
||||
TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output);
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MEMORY_HELPERS_H_
|
||||
@@ -1,452 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
|
||||
|
||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||
#include "tensorflow/lite/micro/micro_string.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace {
|
||||
|
||||
// Returns a character representing a numbered buffer
|
||||
// for GreedyMemoryPlanner::PrintMemoryPlan()
|
||||
char GetOrdinalCharacter(int i) {
|
||||
if (i < 10) {
|
||||
return '0' + i;
|
||||
} else if (i < 36) {
|
||||
return 'a' + (i - 10);
|
||||
} else if (i < 62) {
|
||||
return 'A' + (i - 36);
|
||||
}
|
||||
return '*';
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Simple stable in-place sort function. Not time-efficient for large arrays.
|
||||
// Would normally be in an anonymous namespace to keep it private, but we want
|
||||
// to be able to test it externally.
|
||||
void ReverseSortInPlace(int* values, int* ids, int size) {
|
||||
bool any_swapped;
|
||||
do {
|
||||
any_swapped = false;
|
||||
for (int i = 1; i < size; ++i) {
|
||||
if (values[i - 1] < values[i]) {
|
||||
const int value_temp = values[i - 1];
|
||||
values[i - 1] = values[i];
|
||||
values[i] = value_temp;
|
||||
const int id_temp = ids[i - 1];
|
||||
ids[i - 1] = ids[i];
|
||||
ids[i] = id_temp;
|
||||
any_swapped = true;
|
||||
}
|
||||
}
|
||||
} while (any_swapped);
|
||||
}
|
||||
|
||||
GreedyMemoryPlanner::GreedyMemoryPlanner() {}
|
||||
|
||||
TfLiteStatus GreedyMemoryPlanner::Init(unsigned char* scratch_buffer,
|
||||
int scratch_buffer_size) {
|
||||
// Reset internal states
|
||||
buffer_count_ = 0;
|
||||
need_to_calculate_offsets_ = true;
|
||||
|
||||
// Allocate the arrays we need within the scratch buffer arena.
|
||||
max_buffer_count_ = scratch_buffer_size / per_buffer_size();
|
||||
|
||||
unsigned char* next_free = scratch_buffer;
|
||||
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
|
||||
next_free += sizeof(BufferRequirements) * max_buffer_count_;
|
||||
|
||||
buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
|
||||
next_free += sizeof(int) * max_buffer_count_;
|
||||
|
||||
buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
|
||||
next_free += sizeof(int) * max_buffer_count_;
|
||||
|
||||
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
|
||||
next_free += sizeof(ListEntry) * max_buffer_count_;
|
||||
|
||||
buffer_offsets_ = reinterpret_cast<int*>(next_free);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
GreedyMemoryPlanner::~GreedyMemoryPlanner() {
|
||||
// We don't own the scratch buffer, so don't deallocate anything.
|
||||
}
|
||||
|
||||
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
|
||||
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
|
||||
int last_time_used) {
|
||||
if (buffer_count_ >= max_buffer_count_) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
|
||||
max_buffer_count_);
|
||||
return kTfLiteError;
|
||||
}
|
||||
BufferRequirements* current = &requirements_[buffer_count_];
|
||||
current->size = size;
|
||||
current->first_time_used = first_time_used;
|
||||
current->last_time_used = last_time_used;
|
||||
current->offline_offset = kOnlinePlannedBuffer;
|
||||
++buffer_count_;
|
||||
need_to_calculate_offsets_ = true;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
|
||||
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
|
||||
int last_time_used, int offline_offset) {
|
||||
BufferRequirements* current = &requirements_[buffer_count_];
|
||||
if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
|
||||
kTfLiteOk) {
|
||||
return kTfLiteError;
|
||||
}
|
||||
current->offline_offset = offline_offset;
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
|
||||
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
|
||||
const int last_time_used) const {
|
||||
const BufferRequirements* entry_requirements =
|
||||
&requirements_[entry->requirements_index];
|
||||
if (entry_requirements->first_time_used > last_time_used) {
|
||||
return false;
|
||||
}
|
||||
if (first_time_used > entry_requirements->last_time_used) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
GreedyMemoryPlanner::ListEntry*
|
||||
GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
|
||||
const GreedyMemoryPlanner::ListEntry* start, const int first_time_used,
|
||||
const int last_time_used) {
|
||||
ListEntry* result = nullptr;
|
||||
ListEntry* candidate_next_entry;
|
||||
if (start == nullptr) {
|
||||
candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||
} else {
|
||||
if (start->next_entry_index == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index];
|
||||
}
|
||||
do {
|
||||
if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used,
|
||||
last_time_used)) {
|
||||
result = candidate_next_entry;
|
||||
break;
|
||||
}
|
||||
if (candidate_next_entry->next_entry_index == -1) {
|
||||
break;
|
||||
}
|
||||
candidate_next_entry =
|
||||
&buffers_sorted_by_offset_[candidate_next_entry->next_entry_index];
|
||||
} while (true);
|
||||
return result;
|
||||
}
|
||||
|
||||
void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
|
||||
if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) {
|
||||
return;
|
||||
}
|
||||
need_to_calculate_offsets_ = false;
|
||||
|
||||
// Start off by ordering the buffers in descending order of size.
|
||||
// This helps find a more compact layout. Intuitively, you can think
|
||||
// about putting the large buffers in place first, and then the
|
||||
// smaller buffers can fit in the gaps, rather than fragmenting the
|
||||
// gaps with small buffers at the beginning. Add offline planned offsets
|
||||
// first in the list, since they have a predetermined offset.
|
||||
int idx_from_tail = buffer_count_;
|
||||
int idx_from_head = 0;
|
||||
for (int i = 0; i < buffer_count_; ++i) {
|
||||
if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
|
||||
idx_from_tail--;
|
||||
buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
|
||||
buffer_ids_sorted_[idx_from_tail] = i;
|
||||
buffer_offsets_[i] = -1;
|
||||
} else {
|
||||
buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
|
||||
buffer_ids_sorted_[idx_from_head] = i;
|
||||
buffer_offsets_[i] = requirements_[i].offline_offset;
|
||||
idx_from_head++;
|
||||
}
|
||||
}
|
||||
|
||||
// This sorting algorithm is naive, and may end up taking a very long time
|
||||
// with hundreds of buffers. Do not sort the offline planned offsets.
|
||||
ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
|
||||
&buffer_ids_sorted_[idx_from_head],
|
||||
buffer_count_ - idx_from_head);
|
||||
|
||||
// Initialize the first entry to the first buffer in
|
||||
// buffer_ids_sorted_.
|
||||
// - If there are no offline planned offsets, the largest buffer will be
|
||||
// first, and the buffers will be handled in size order.
|
||||
// - If offline offsets are present, these will be handled first in order
|
||||
// for the greedy algorithm to utilized gaps in the offline plan.
|
||||
first_entry_index_ = 0;
|
||||
next_free_entry_ = 1;
|
||||
ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||
first_entry->next_entry_index = -1; // to mark the entry as end of list
|
||||
int buffer_id = buffer_ids_sorted_[0];
|
||||
first_entry->requirements_index = buffer_id;
|
||||
if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
|
||||
buffer_offsets_[buffer_id] = 0;
|
||||
}
|
||||
first_entry->offset = buffer_offsets_[buffer_id];
|
||||
|
||||
// Work through the rest of the buffers to find a good gap to place each one.
|
||||
for (int i = 1; i < buffer_count_; ++i) {
|
||||
// The id is the order the buffer was originally added by the client.
|
||||
buffer_id = buffer_ids_sorted_[i];
|
||||
// Look at what size and time range the buffer needs to be active.
|
||||
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
|
||||
const int wanted_size = wanted_requirements->size;
|
||||
const int wanted_first_time_used = wanted_requirements->first_time_used;
|
||||
const int wanted_last_time_used = wanted_requirements->last_time_used;
|
||||
|
||||
// Find the first buffer that's active in our time range. All placed
|
||||
// buffers are stored in the order of their starting position in the arena
|
||||
// so that it's easy to find the next buffer in memory, and so the gap.
|
||||
// The candidate_entry variable holds the buffer that we're considering
|
||||
// placing the current buffer after.
|
||||
|
||||
int candidate_offset = 0;
|
||||
// Loop through the offset-ordered list of buffers, looking for gaps.
|
||||
if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
|
||||
ListEntry* prior_entry = nullptr;
|
||||
while (true) {
|
||||
// Find out what the next active buffer is.
|
||||
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
|
||||
prior_entry, wanted_first_time_used, wanted_last_time_used);
|
||||
|
||||
if (prior_entry) {
|
||||
BufferRequirements* candidate_requirements =
|
||||
&requirements_[prior_entry->requirements_index];
|
||||
const int prior_entry_offset =
|
||||
prior_entry->offset + candidate_requirements->size;
|
||||
if (prior_entry_offset > candidate_offset) {
|
||||
candidate_offset = prior_entry_offset;
|
||||
}
|
||||
}
|
||||
if (next_entry == nullptr) {
|
||||
// We're at the end of the list, so we can always append the buffer
|
||||
// here.
|
||||
break;
|
||||
}
|
||||
// Find out how much space there is between us and the next buffer.
|
||||
const int gap = next_entry->offset - candidate_offset;
|
||||
if (gap >= wanted_size) {
|
||||
// This entry has a big enough gap between it and the next, so
|
||||
// use it!
|
||||
break;
|
||||
}
|
||||
// The gap wasn't big enough, so move on to another candidate.
|
||||
prior_entry = next_entry;
|
||||
}
|
||||
} else {
|
||||
// Offline planned offset are to be considered constant
|
||||
candidate_offset = wanted_requirements->offline_offset;
|
||||
}
|
||||
// At this point, we've either found a gap (possibly at the end of the
|
||||
// list) and want to place the buffer there, or there are no other active
|
||||
// buffers in this time range and so we can put it at offset zero.
|
||||
// Record the buffer's offset in our plan.
|
||||
buffer_offsets_[buffer_id] = candidate_offset;
|
||||
// Add the newly-placed buffer to our offset-ordered list, so that
|
||||
// subsequent passes can fit in their buffers around it.
|
||||
ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_];
|
||||
new_entry->offset = candidate_offset;
|
||||
new_entry->requirements_index = buffer_id;
|
||||
const int new_entry_index = next_free_entry_;
|
||||
++next_free_entry_;
|
||||
|
||||
if (first_entry->offset > candidate_offset) {
|
||||
// The new entry offset is smaller than the first entry offset =>
|
||||
// replace the first entry
|
||||
first_entry = new_entry;
|
||||
first_entry->next_entry_index = first_entry_index_;
|
||||
first_entry_index_ = new_entry_index;
|
||||
} else {
|
||||
ListEntry* current_entry = first_entry;
|
||||
// Make sure that we insert the buffer at the correct place in the
|
||||
// buffer-offset-ordered list
|
||||
while (true) {
|
||||
const int next_entry_index = current_entry->next_entry_index;
|
||||
if (next_entry_index == -1) {
|
||||
// We're at the end of the list, so just add the new entry here.
|
||||
current_entry->next_entry_index = new_entry_index;
|
||||
new_entry->next_entry_index = -1;
|
||||
break;
|
||||
}
|
||||
// not at the end of the list -> take a look at next entry
|
||||
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
|
||||
if (next_entry->offset > candidate_offset) {
|
||||
// We're at the right spot to do an insertion and retain the sorting
|
||||
// order, so place the new entry here.
|
||||
new_entry->next_entry_index = current_entry->next_entry_index;
|
||||
current_entry->next_entry_index = new_entry_index;
|
||||
break;
|
||||
}
|
||||
current_entry = next_entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
|
||||
CalculateOffsetsIfNeeded();
|
||||
if (buffer_count_ == 0) {
|
||||
return 0;
|
||||
}
|
||||
ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||
size_t max_size = 0;
|
||||
while (entry) {
|
||||
BufferRequirements* requirements =
|
||||
&requirements_[entry->requirements_index];
|
||||
const size_t current_size = entry->offset + requirements->size;
|
||||
if (current_size > max_size) {
|
||||
max_size = current_size;
|
||||
}
|
||||
if (entry->next_entry_index == -1) {
|
||||
break;
|
||||
}
|
||||
entry = &buffers_sorted_by_offset_[entry->next_entry_index];
|
||||
}
|
||||
return max_size;
|
||||
}
|
||||
|
||||
void GreedyMemoryPlanner::PrintMemoryPlan() {
|
||||
CalculateOffsetsIfNeeded();
|
||||
|
||||
for (int i = 0; i < buffer_count_; ++i) {
|
||||
MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d",
|
||||
GetOrdinalCharacter(i), i, requirements_[i].size,
|
||||
buffer_offsets_[i], requirements_[i].first_time_used,
|
||||
requirements_[i].last_time_used);
|
||||
}
|
||||
|
||||
constexpr int kLineWidth = 80;
|
||||
int max_size = kLineWidth;
|
||||
int max_time = 0;
|
||||
for (int i = 0; i < buffer_count_; ++i) {
|
||||
BufferRequirements* requirements = &requirements_[i];
|
||||
const int offset = buffer_offsets_[i];
|
||||
const int last_time_used = requirements->last_time_used;
|
||||
const int size = offset + requirements->size;
|
||||
if (size > max_size) {
|
||||
max_size = size;
|
||||
}
|
||||
if (last_time_used > max_time) {
|
||||
max_time = last_time_used;
|
||||
}
|
||||
}
|
||||
|
||||
char line[kLineWidth + 1];
|
||||
for (int t = 0; t <= max_time; ++t) {
|
||||
for (int c = 0; c < kLineWidth; ++c) {
|
||||
line[c] = '.';
|
||||
}
|
||||
int memory_use = 0;
|
||||
for (int i = 0; i < buffer_count_; ++i) {
|
||||
BufferRequirements* requirements = &requirements_[i];
|
||||
if ((t < requirements->first_time_used) ||
|
||||
(t > requirements->last_time_used)) {
|
||||
continue;
|
||||
}
|
||||
const int offset = buffer_offsets_[i];
|
||||
if (offset == -1) {
|
||||
continue;
|
||||
}
|
||||
const int size = requirements->size;
|
||||
memory_use += size;
|
||||
const int line_start = (offset * kLineWidth) / max_size;
|
||||
const int line_end = ((offset + size) * kLineWidth) / max_size;
|
||||
for (int n = line_start; n < line_end; ++n) {
|
||||
if (line[n] == '.') {
|
||||
line[n] = GetOrdinalCharacter(i);
|
||||
} else {
|
||||
line[n] = '!';
|
||||
}
|
||||
}
|
||||
}
|
||||
line[kLineWidth] = 0;
|
||||
|
||||
MicroPrintf("%s%d: %s (%dk)", t < 10 ? " " : "", t, (const char*)line,
|
||||
(memory_use + 1023) / 1024);
|
||||
}
|
||||
}
|
||||
|
||||
int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; }
|
||||
|
||||
TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(
|
||||
tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
|
||||
CalculateOffsetsIfNeeded();
|
||||
if ((buffer_index < 0) || (buffer_index >= buffer_count_)) {
|
||||
TF_LITE_REPORT_ERROR(error_reporter,
|
||||
"buffer index %d is outside range 0 to %d",
|
||||
buffer_index, buffer_count_);
|
||||
return kTfLiteError;
|
||||
}
|
||||
*offset = buffer_offsets_[buffer_index];
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
|
||||
CalculateOffsetsIfNeeded();
|
||||
bool were_overlaps_found = false;
|
||||
for (int i = 0; i < buffer_count_; ++i) {
|
||||
BufferRequirements* a_requirements = &requirements_[i];
|
||||
const int a_start_offset = buffer_offsets_[i];
|
||||
const int a_first_time_used = a_requirements->first_time_used;
|
||||
const int a_last_time_used = a_requirements->last_time_used;
|
||||
const int a_end_offset = a_start_offset + a_requirements->size;
|
||||
for (int j = 0; j < buffer_count_; ++j) {
|
||||
if (i == j) {
|
||||
continue;
|
||||
}
|
||||
BufferRequirements* b_requirements = &requirements_[j];
|
||||
const int b_start_offset = buffer_offsets_[j];
|
||||
const int b_first_time_used = b_requirements->first_time_used;
|
||||
const int b_last_time_used = b_requirements->last_time_used;
|
||||
const int b_end_offset = b_start_offset + b_requirements->size;
|
||||
if ((a_first_time_used > b_last_time_used) ||
|
||||
(b_first_time_used > a_last_time_used)) {
|
||||
// Buffers don't overlap in time.
|
||||
continue;
|
||||
}
|
||||
if ((a_start_offset >= b_end_offset) ||
|
||||
(b_start_offset >= a_end_offset)) {
|
||||
// No overlap in memory.
|
||||
continue;
|
||||
}
|
||||
were_overlaps_found = true;
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter, "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)",
|
||||
i, a_first_time_used, a_last_time_used, a_start_offset, a_end_offset,
|
||||
j, b_first_time_used, b_last_time_used, b_start_offset, b_end_offset);
|
||||
}
|
||||
}
|
||||
return were_overlaps_found;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
||||
@@ -1,167 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
|
||||
#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
|
||||
|
||||
#include "tensorflow/lite/micro/compatibility.h"
|
||||
#include "tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
constexpr int kOnlinePlannedBuffer = -1;
|
||||
|
||||
// A memory planner that uses a greedy algorithm to arrange buffers in memory
|
||||
// to minimize the overall arena size needed.
|
||||
//
|
||||
// The algorithm works like this:
|
||||
// - The client enters the buffer information through AddBuffer().
|
||||
// - When a function like GetOffsetForBuffer() is called, the
|
||||
// CalculateOffsetsIfNeeded() method is invoked.
|
||||
// - If an up to date plan is not already present, one will be calculated.
|
||||
// - The buffers are sorted in descending order of size.
|
||||
// - The largest buffer is placed at offset zero.
|
||||
// - The rest of the buffers are looped through in descending size order.
|
||||
// - The other buffers that need to be in memory at the same time are found.
|
||||
// - The first gap between simultaneously active buffers that the current
|
||||
// buffer fits into will be used.
|
||||
// - If no large-enough gap is found, the current buffer is placed after the
|
||||
// last buffer that's simultaneously active.
|
||||
// - This continues until all buffers are placed, and the offsets stored.
|
||||
//
|
||||
// This is not guaranteed to produce the best placement, since that's an
|
||||
// NP-Complete problem, but in practice it should produce one that's decent.
|
||||
class GreedyMemoryPlanner : public MicroMemoryPlanner {
|
||||
public:
|
||||
GreedyMemoryPlanner();
|
||||
~GreedyMemoryPlanner() override;
|
||||
|
||||
// You need to pass in an area of memory to be used for planning. The client
|
||||
// should ensure the validity of the memory when it needs to use this object.
|
||||
// This memory isn't owned by this object, so management should be handled by
|
||||
// the client. This is so it can be stack or globally allocated if necessary
|
||||
// on devices without dynamic memory allocation. How many buffers can be
|
||||
// planned for will depend on the size of this scratch memory, so you should
|
||||
// enlarge it if you see an error when calling AddBuffer(). The memory can be
|
||||
// reused once you're done with the planner, as long as you copy the
|
||||
// calculated offsets to another location. Each buffer requires about 36 bytes
|
||||
// of scratch.
|
||||
TfLiteStatus Init(unsigned char* scratch_buffer,
|
||||
int scratch_buffer_size) override;
|
||||
|
||||
// Record details of a buffer we want to place.
|
||||
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
||||
int first_time_used, int last_time_used) override;
|
||||
|
||||
// Record details of an offline planned buffer offset we want to place.
|
||||
// offline_offset is the buffer offset from the start of the arena.
|
||||
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
||||
int first_time_used, int last_time_used,
|
||||
int offline_offset) override;
|
||||
|
||||
// Returns the high-water mark of used memory. This is the minimum size of a
|
||||
// memory arena you'd need to allocate to hold these buffers.
|
||||
size_t GetMaximumMemorySize() override;
|
||||
|
||||
// How many buffers have been recorded.
|
||||
int GetBufferCount() override;
|
||||
|
||||
// Where a given buffer should be placed in the memory arena.
|
||||
// This information is stored in the memory arena itself, so once the arena
|
||||
// is used for inference, it will be overwritten.
|
||||
TfLiteStatus GetOffsetForBuffer(ErrorReporter* error_reporter,
|
||||
int buffer_index, int* offset) override;
|
||||
|
||||
// Prints an ascii-art diagram of the buffer layout plan.
|
||||
void PrintMemoryPlan() override;
|
||||
|
||||
// Debug method to check whether any buffer allocations are overlapping. This
|
||||
// is an O(N^2) complexity operation, so only use for testing.
|
||||
bool DoAnyBuffersOverlap(ErrorReporter* error_reporter);
|
||||
|
||||
// Used to store a list of buffers ordered by their offset.
|
||||
struct ListEntry {
|
||||
int offset;
|
||||
int requirements_index;
|
||||
int next_entry_index;
|
||||
};
|
||||
|
||||
// Number of bytes required in order to plan a buffer.
|
||||
static size_t per_buffer_size() {
|
||||
const int per_buffer_size =
|
||||
sizeof(BufferRequirements) + // requirements_
|
||||
sizeof(int) + // buffer_sizes_sorted_
|
||||
sizeof(int) + // buffer_ids_sorted_
|
||||
sizeof(ListEntry) + // buffers_sorted_by_offset_
|
||||
sizeof(int); // buffer_offsets_;
|
||||
return per_buffer_size;
|
||||
}
|
||||
|
||||
private:
|
||||
// Whether a buffer is active in a given time range.
|
||||
bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,
|
||||
const int last_time_used) const;
|
||||
|
||||
// Walks the list to return the next buffer that is active in a given time
|
||||
// range, or a null pointer if there are none.
|
||||
ListEntry* NextSimultaneouslyActiveBuffer(const ListEntry* start,
|
||||
const int first_time_used,
|
||||
const int last_time_used);
|
||||
|
||||
// If there isn't an up to date plan, calculate a new one.
|
||||
void CalculateOffsetsIfNeeded();
|
||||
|
||||
// How many buffers we can plan for, based on the arena size we're given in
|
||||
// the constructor.
|
||||
int max_buffer_count_;
|
||||
|
||||
// The number of buffers added so far.
|
||||
int buffer_count_;
|
||||
|
||||
// Records the client-provided information about each buffer.
|
||||
struct BufferRequirements {
|
||||
int size;
|
||||
int offline_offset;
|
||||
int first_time_used;
|
||||
int last_time_used;
|
||||
};
|
||||
|
||||
// Working arrays used during the layout algorithm.
|
||||
BufferRequirements* requirements_;
|
||||
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
|
||||
// {
|
||||
// offline planned buffers,
|
||||
// online planned buffers sorted by size
|
||||
// }
|
||||
int* buffer_sizes_sorted_;
|
||||
int* buffer_ids_sorted_;
|
||||
ListEntry* buffers_sorted_by_offset_;
|
||||
int next_free_entry_; // Index of the next free entry of
|
||||
// buffers_sorted_by_offset_
|
||||
int first_entry_index_; // Index of the first entry (smallest offset) of
|
||||
// buffers_sorted_by_offset_
|
||||
|
||||
// Stores the outcome of the plan, the location of each buffer in the arena.
|
||||
int* buffer_offsets_;
|
||||
|
||||
// Whether buffers have been added since the last plan was calculated.
|
||||
bool need_to_calculate_offsets_;
|
||||
|
||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user