Compare commits

...

43 Commits

Author SHA1 Message Date
jomjol
18f6e83a2c v8.4.0 2021-09-25 18:57:40 +02:00
jomjol
147d97421b Merge branch 'rolling' 2021-09-25 18:53:47 +02:00
jomjol
dcf2feb7aa v8.4.0 2021-09-25 18:53:14 +02:00
jomjol
e63e940b96 v8.4.0 2021-09-25 08:08:21 +02:00
jomjol
68b0fb83ee v8.4.0 2021-09-24 19:57:48 +02:00
jomjol
f15e5f060a v8.4.0 2021-09-23 18:43:53 +02:00
jomjol
e2a403441f Rolling 20210922 2021-09-22 22:13:08 +02:00
jomjol
9b3665b9c6 Rolling 20210921 v2 2021-09-21 19:41:20 +02:00
jomjol
f4c8bf9206 Rolling 20210921 2021-09-21 18:49:32 +02:00
jomjol
c033db9c31 Rolling 20210921 2021-09-21 07:27:46 +02:00
jomjol
9300526f49 Rolling 2021-09-20 2021-09-20 21:18:34 +02:00
jomjol
b6dd1f7f2d Update 2021-09-14 20:00:45 +02:00
jomjol
1e6eddca04 Rolling 20210913 2021-09-13 20:05:54 +02:00
jomjol
19ca0d7dd7 Update Versioninfo 2021-09-12 07:33:30 +02:00
jomjol
7fcb5d1c0c v8.3.0 2021-09-12 07:29:30 +02:00
jomjol
dd995ec28a Rolling 20210910 2021-09-10 09:26:52 +02:00
jomjol
af99de3535 IgnoreLeadingNaN 2021-09-02 11:04:01 +02:00
jomjol
3567cc2fb0 Merge pull request #330 from pixeldoc2000/pixeldoc2000-patch-1
Pixeldoc2000 patch 1
2021-09-02 11:00:49 +02:00
pixel::doc
5e9d9bd264 Update edit_config_param.html
Fixed some more Text.
2021-09-02 10:09:36 +02:00
pixel::doc
62447c1bb9 Update edit_config_param.html
Fixed some Text
2021-09-02 00:23:59 +02:00
jomjol
a86434c9a2 Rolling 20210831 2021-08-31 11:40:29 +02:00
jomjol
b7b70299f7 Rolling 20210830 2021-08-30 21:21:18 +02:00
jomjol
eb02e0aec1 new images 2021-08-29 20:57:21 +02:00
jomjol
7816e53db7 v8.2.0 2021-08-24 08:37:18 +02:00
jomjol
7ae08e572a Merge branch 'rolling' 2021-08-24 08:34:32 +02:00
jomjol
47d15d8adb v8.2.0 2021-08-24 08:33:56 +02:00
jomjol
0dac0e87e4 rolling 20210823 2021-08-23 18:57:48 +02:00
jomjol
b290099d5b v12.0.0 2021-08-12 07:25:12 +02:00
jomjol
f6b1a41a0b v12.0.0 2021-08-12 07:20:58 +02:00
jomjol
e529af04cf Update FeatureRequest.md 2021-08-10 20:19:05 +02:00
jomjol
6c365dd949 rolling v20210809 2021-08-09 21:53:07 +02:00
jomjol
32f15fc557 rolling 20210708 2021-08-07 15:25:27 +02:00
jomjol
6f06af1d5f Update README.md 2021-08-01 21:52:00 +02:00
jomjol
a91f99faab update 2021-08-01 21:49:29 +02:00
jomjol
17a87b23a1 v8.0.5 2021-08-01 21:46:17 +02:00
jomjol
d4b5ec2ae2 v8.0.4 2021-07-29 20:18:53 +02:00
jomjol
1bcaf09855 v8.0.4 2021-07-29 20:14:36 +02:00
jomjol
fa3842b2b4 v8.0.3 2021-07-25 18:15:35 +02:00
jomjol
ea72256e56 Merge branch 'rolling' 2021-07-25 18:08:43 +02:00
jomjol
be5828cb3e v8.0.3 2021-07-25 18:07:50 +02:00
jomjol
104b72505c Rolling - 20210725 2021-07-25 13:44:22 +02:00
jomjol
23728a0686 Update README.md 2021-07-23 21:02:19 +02:00
jomjol
eaaa856b13 Update README.md 2021-07-23 21:01:50 +02:00
209 changed files with 13233 additions and 3817 deletions

View File

@@ -2,6 +2,36 @@
##### 7.1.2 MQTT-Update - (2021-06-17)
* NEW: 7.1.2: bug fix setting hostname, Flash-LED not off during reboot
* NEW: 7.1.1: bug fix wlan password with "=" (again)
* MQTT error message: changes "no error", send retain flag
* Update wlan handling to esp-idf 4.1
* Upgrade digital CNN to v8.7.0 (added new images)
* Bug fix: MQTT, WLAN, LED-Controll, GPIO usage, fixed IP, calculation flow rate
##### 7.0.1 MQTT-Update - (2021-05-13)
* NEW: 7.0.1: bug fix wlan password with "="
* Upgrade digital CNN to v8.5.0 (added new images)
* New MQTT topics: flow rate (units/minute), time stamp (last correct read readout)
* Update MQTT/Error topic to " " in case no error (instead of empty string)
* Portrait or landscape image orientation in rotated image (avoid cropping)
##### 6.7.2 Image Processing in Memory - (2021-05-01)
* NEW 6.7.2: Updated html for setup modus - remove reboot on edit configuration)

View File

@@ -11,6 +11,18 @@
____
#### #11 MQTT - configurable payload
* https://github.com/jomjol/AI-on-the-edge-device/issues/344
#### #10 Improve and bug fix logging of images
* https://github.com/jomjol/AI-on-the-edge-device/issues/307
#### #9 Basic auth for the UI
* https://github.com/jomjol/AI-on-the-edge-device/issues/283

21
LICENSE
View File

@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2020 jomjol
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -47,10 +47,58 @@ In other cases you can contact the developer via email: <img src="https://raw.gi
##### 8.0.2 - Multi Meter Support (2021-07-23
##### 8.4.0 - Multi Meter Support (2021-09-25)
* NEW 8.0.1: saving roundes prevalue, bug fix html server
* NEW 8.0.1: bug fix html handling of parameter `FixedExposure` and `ImageSize`
* License change (remove MIT license, remark see below)
* html: show hostname in title and main page
* configuration:
* moved setting `ExtendedResolution` to individual number settings
* New parameter `IgnoreLeadingNaN` (delete leading NaN's specifically)
* **ATTENTION**: update of the `config.ini` needed (open, adjust `ExtendedResolution`, save)
* Bug fixing (html, images of recognized numbers)
### **ATTENTION: LICENSE CHANGE - removal of MIT License.**
- Currently no licence published - copyright belongs to author
- If you are interested in a commercial usage or dedicated versions please contact the developer
- no limits to private usage
##### 8.3.0 - Multi Meter Support (2021-09-12)
* Upgrade digital CNN to v12.1.0 (added new images)
* Dedicated NaN handling, internal refactoring (CNN-Handling)
* HTML: confirmation after config.ini update
* Bug fixing
##### 8.2.0 - Multi Meter Support (2021-08-24)
* Improve server responsiveness
* Flow status and prevalue status in overview
* Improved prevalue handling
##### 8.1.0 - Multi Meter Support (2021-08-12)
* GPIO: using the general mqtt main topic for GPIO
* Upgrade digital CNN to v12.0.0 (added new images)
* Update tfmicro to new master (2021-08-07)
* Bug fix: remove text in mqtt value, remove connect limit in wlan reconnet
##### 8.0.5 - Multi Meter Support (2021-08-01)
* NEW 8.0.5: bug fix: saving prevalue
* NEW 8.0.4: bug fix: load config.ini after upgrade
* NEW 8.0.3: bug fix: reboot during `config.ini` handling, html error
* NEW 8.0.2: saving roundes prevalue, bug fix html server
* NEW 8.0.1: bug fix: html handling of parameter `FixedExposure` and `ImageSize`
* Dual / multi meter support (more than 1 number to be recognized)
This is implemented with the feature "number" on the ROI definition as well as selected options
* MQTT: standardization of the naming - including new topics (`json`, `freeMem `, `uptime`)c
@@ -60,32 +108,6 @@ In other cases you can contact the developer via email: <img src="https://raw.gi
<span style="color: red;">**ATTENTION: the configuration and prevalue files are modified automatically and will not be backward compatible!**</span>
##### 7.1.2 MQTT-Update - (2021-06-17)
* NEW: 7.1.2: bug fix setting hostname, Flash-LED not off during rebootNEW: 7.1.1: bug fix wlan password with "=" (again)
* MQTT error message: changes "no error", send retain flag
* Update wlan handling to esp-idf 4.1
* Upgrade digital CNN to v8.7.0 (added new images)
* Bug fix: MQTT, WLAN, LED-Controll, GPIO usage, fixed IP, calculation flow rate
##### 7.0.1 MQTT-Update - (2021-05-13)
* NEW: 7.0.1: bug fix wlan password with "="
* Upgrade digital CNN to v8.5.0 (added new images)
* New MQTT topics: flow rate (units/minute), time stamp (last correct read readout)
* Update MQTT/Error topic to " " in case no error (instead of empty string)
* Portrait or landscape image orientation in rotated image (avoid cropping)
## Additional ideas
There are some ideas and feature request, which are not followed currently - mainly due to capacity reasons on side of the developer. They are collected here: [FeatureRequest.md](FeatureRequest.md)
@@ -96,6 +118,10 @@ There are some ideas and feature request, which are not followed currently - mai
## History
##### 7.1.2 MQTT-Update - (2021-06-17)
**7.0.1 MQTT-Update - (2021-05-13)**
##### 6.7.2 Image Processing in Memory - (2021-05-01)
##### 5.0.0 Setup Modus - (2020-12-06)
@@ -118,8 +144,3 @@ There are some ideas and feature request, which are not followed currently - mai
#### [Full Changelog](Changelog.md)
## Solved topics
* n.a.

63
code/SmartLeds.cpp Normal file
View File

@@ -0,0 +1,63 @@
#include "SmartLeds.h"
IsrCore SmartLed::_interruptCore = CoreCurrent;
intr_handle_t SmartLed::_interruptHandle = NULL;
SmartLed*& IRAM_ATTR SmartLed::ledForChannel( int channel ) {
static SmartLed* table[8] = { nullptr };
assert( channel < 8 );
return table[ channel ];
}
void IRAM_ATTR SmartLed::interruptHandler(void*) {
for (int channel = 0; channel != 8; channel++) {
auto self = ledForChannel( channel );
if ( RMT.int_st.val & (1 << (24 + channel ) ) ) { // tx_thr_event
if ( self )
self->copyRmtHalfBlock();
RMT.int_clr.val |= 1 << ( 24 + channel );
} else if ( RMT.int_st.val & ( 1 << (3 * channel ) ) ) { // tx_end
if ( self )
xSemaphoreGiveFromISR( self->_finishedFlag, nullptr );
RMT.int_clr.val |= 1 << ( 3 * channel );
}
}
}
void IRAM_ATTR SmartLed::copyRmtHalfBlock() {
int offset = detail::MAX_PULSES * _halfIdx;
_halfIdx = !_halfIdx;
int len = 3 - _componentPosition + 3 * ( _count - 1 );
len = std::min( len, detail::MAX_PULSES / 8 );
if ( !len ) {
for ( int i = 0; i < detail::MAX_PULSES; i++) {
RMTMEM.chan[ _channel].data32[i + offset ].val = 0;
}
}
int i;
for ( i = 0; i != len && _pixelPosition != _count; i++ ) {
uint8_t val = _buffer[ _pixelPosition ].getGrb( _componentPosition );
for ( int j = 0; j != 8; j++, val <<= 1 ) {
int bit = val >> 7;
int idx = i * 8 + offset + j;
RMTMEM.chan[ _channel ].data32[ idx ].val = _bitToRmt[ bit & 0x01 ].value;
}
if ( _pixelPosition == _count - 1 && _componentPosition == 2 ) {
RMTMEM.chan[ _channel ].data32[ i * 8 + offset + 7 ].duration1 =
_timing.TRS / ( detail::RMT_DURATION_NS * detail::DIVIDER );
}
_componentPosition++;
if ( _componentPosition == 3 ) {
_componentPosition = 0;
_pixelPosition++;
}
}
for ( i *= 8; i != detail::MAX_PULSES; i++ ) {
RMTMEM.chan[ _channel ].data32[ i + offset ].val = 0;
}
}

530
code/SmartLeds.h Normal file
View File

@@ -0,0 +1,530 @@
#pragma once
/*
* A C++ driver for the WS2812 LEDs using the RMT peripheral on the ESP32.
*
* Jan "yaqwsx" Mrázek <email@honzamrazek.cz>
*
* Based on the work by Martin F. Falatic - https://github.com/FozzTexx/ws2812-demo
*/
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <memory>
#include <cassert>
#include <cstring>
#if defined ( ARDUINO )
extern "C" { // ...someone forgot to put in the includes...
#include "esp32-hal.h"
#include "esp_intr_alloc.h"
#include "esp_ipc.h"
#include "driver/gpio.h"
#include "driver/periph_ctrl.h"
#include "freertos/semphr.h"
#include "soc/rmt_struct.h"
#include <driver/spi_master.h>
#include "esp_idf_version.h"
#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL( 4, 0, 0 )
#include "soc/dport_reg.h"
#endif
}
#elif defined ( ESP_PLATFORM )
extern "C" { // ...someone forgot to put in the includes...
#include <esp_intr_alloc.h>
#include <esp_ipc.h>
#include <driver/gpio.h>
#include <freertos/FreeRTOS.h>
#include <freertos/semphr.h>
#include <soc/dport_reg.h>
#include <soc/gpio_sig_map.h>
#include <soc/rmt_struct.h>
#include <driver/spi_master.h>
}
#include <stdio.h>
#endif
#include "Color.h"
namespace detail {
struct TimingParams {
uint32_t T0H;
uint32_t T1H;
uint32_t T0L;
uint32_t T1L;
uint32_t TRS;
};
union RmtPulsePair {
struct {
int duration0:15;
int level0:1;
int duration1:15;
int level1:1;
};
uint32_t value;
};
static const int DIVIDER = 4; // 8 still seems to work, but timings become marginal
static const int MAX_PULSES = 32; // A channel has a 64 "pulse" buffer - we use half per pass
static const double RMT_DURATION_NS = 12.5; // minimum time of a single RMT duration based on clock ns
} // namespace detail
using LedType = detail::TimingParams;
static const LedType LED_WS2812 = { 350, 700, 800, 600, 50000 };
static const LedType LED_WS2812B = { 400, 850, 850, 400, 50100 };
static const LedType LED_SK6812 = { 300, 600, 900, 600, 80000 };
static const LedType LED_WS2813 = { 350, 800, 350, 350, 300000 };
enum BufferType { SingleBuffer = 0, DoubleBuffer };
enum IsrCore { CoreFirst = 0, CoreSecond = 1, CoreCurrent = 2};
class SmartLed {
public:
// The RMT interrupt must not run on the same core as WiFi interrupts, otherwise SmartLeds
// can't fill the RMT buffer fast enough, resulting in rendering artifacts.
// Usually, that means you have to set isrCore == CoreSecond.
//
// If you use anything other than CoreCurrent, the FreeRTOS scheduler MUST be already running,
// so you can't use it if you define SmartLed as global variable.
SmartLed( const LedType& type, int count, int pin, int channel = 0, BufferType doubleBuffer = SingleBuffer, IsrCore isrCore = CoreCurrent)
: _timing( type ),
_channel( channel ),
_count( count ),
_firstBuffer( new Rgb[ count ] ),
_secondBuffer( doubleBuffer ? new Rgb[ count ] : nullptr ),
_finishedFlag( xSemaphoreCreateBinary() )
{
assert( channel >= 0 && channel < 8 );
assert( ledForChannel( channel ) == nullptr );
xSemaphoreGive( _finishedFlag );
DPORT_SET_PERI_REG_MASK( DPORT_PERIP_CLK_EN_REG, DPORT_RMT_CLK_EN );
DPORT_CLEAR_PERI_REG_MASK( DPORT_PERIP_RST_EN_REG, DPORT_RMT_RST );
PIN_FUNC_SELECT( GPIO_PIN_MUX_REG[ pin ], 2 );
gpio_set_direction( static_cast< gpio_num_t >( pin ), GPIO_MODE_OUTPUT );
gpio_matrix_out( static_cast< gpio_num_t >( pin ), RMT_SIG_OUT0_IDX + _channel, 0, 0 );
initChannel( _channel );
RMT.tx_lim_ch[ _channel ].limit = detail::MAX_PULSES;
RMT.int_ena.val |= 1 << ( 24 + _channel );
RMT.int_ena.val |= 1 << ( 3 * _channel );
_bitToRmt[ 0 ].level0 = 1;
_bitToRmt[ 0 ].level1 = 0;
_bitToRmt[ 0 ].duration0 = _timing.T0H / ( detail::RMT_DURATION_NS * detail::DIVIDER );
_bitToRmt[ 0 ].duration1 = _timing.T0L / ( detail::RMT_DURATION_NS * detail::DIVIDER );
_bitToRmt[ 1 ].level0 = 1;
_bitToRmt[ 1 ].level1 = 0;
_bitToRmt[ 1 ].duration0 = _timing.T1H / ( detail::RMT_DURATION_NS * detail::DIVIDER );
_bitToRmt[ 1 ].duration1 = _timing.T1L / ( detail::RMT_DURATION_NS * detail::DIVIDER );
if ( !anyAlive() ) {
_interruptCore = isrCore;
if(isrCore != CoreCurrent) {
ESP_ERROR_CHECK(esp_ipc_call_blocking(isrCore, registerInterrupt, NULL));
} else {
registerInterrupt(NULL);
}
}
ledForChannel( channel ) = this;
}
~SmartLed() {
ledForChannel( _channel ) = nullptr;
if ( !anyAlive() ) {
if(_interruptCore != CoreCurrent) {
ESP_ERROR_CHECK(esp_ipc_call_blocking(_interruptCore, unregisterInterrupt, NULL));
} else {
unregisterInterrupt(NULL);
}
}
vSemaphoreDelete( _finishedFlag );
}
Rgb& operator[]( int idx ) {
return _firstBuffer[ idx ];
}
const Rgb& operator[]( int idx ) const {
return _firstBuffer[ idx ];
}
void show() {
_buffer = _firstBuffer.get();
startTransmission();
swapBuffers();
}
bool wait( TickType_t timeout = portMAX_DELAY ) {
if( xSemaphoreTake( _finishedFlag, timeout ) == pdTRUE ) {
xSemaphoreGive( _finishedFlag );
return true;
}
return false;
}
int size() const {
return _count;
}
Rgb *begin() { return _firstBuffer.get(); }
const Rgb *begin() const { return _firstBuffer.get(); }
const Rgb *cbegin() const { return _firstBuffer.get(); }
Rgb *end() { return _firstBuffer.get() + _count; }
const Rgb *end() const { return _firstBuffer.get() + _count; }
const Rgb *cend() const { return _firstBuffer.get() + _count; }
private:
static intr_handle_t _interruptHandle;
static IsrCore _interruptCore;
static void initChannel( int channel ) {
RMT.apb_conf.fifo_mask = 1; //enable memory access, instead of FIFO mode.
RMT.apb_conf.mem_tx_wrap_en = 1; //wrap around when hitting end of buffer
RMT.conf_ch[ channel ].conf0.div_cnt = detail::DIVIDER;
RMT.conf_ch[ channel ].conf0.mem_size = 1;
RMT.conf_ch[ channel ].conf0.carrier_en = 0;
RMT.conf_ch[ channel ].conf0.carrier_out_lv = 1;
RMT.conf_ch[ channel ].conf0.mem_pd = 0;
RMT.conf_ch[ channel ].conf1.rx_en = 0;
RMT.conf_ch[ channel ].conf1.mem_owner = 0;
RMT.conf_ch[ channel ].conf1.tx_conti_mode = 0; //loop back mode.
RMT.conf_ch[ channel ].conf1.ref_always_on = 1; // use apb clock: 80M
RMT.conf_ch[ channel ].conf1.idle_out_en = 1;
RMT.conf_ch[ channel ].conf1.idle_out_lv = 0;
}
static void registerInterrupt(void *) {
ESP_ERROR_CHECK(esp_intr_alloc( ETS_RMT_INTR_SOURCE, 0, interruptHandler, nullptr, &_interruptHandle));
}
static void unregisterInterrupt(void*) {
esp_intr_free( _interruptHandle );
}
static SmartLed*& IRAM_ATTR ledForChannel( int channel );
static void IRAM_ATTR interruptHandler( void* );
void IRAM_ATTR copyRmtHalfBlock();
void swapBuffers() {
if ( _secondBuffer )
_firstBuffer.swap( _secondBuffer );
}
void startTransmission() {
// Invalid use of the library
if( xSemaphoreTake( _finishedFlag, 0 ) != pdTRUE )
abort();
_pixelPosition = _componentPosition = _halfIdx = 0;
copyRmtHalfBlock();
if ( _pixelPosition < _count )
copyRmtHalfBlock();
RMT.conf_ch[ _channel ].conf1.mem_rd_rst = 1;
RMT.conf_ch[ _channel ].conf1.tx_start = 1;
}
static bool anyAlive() {
for ( int i = 0; i != 8; i++ )
if ( ledForChannel( i ) != nullptr ) return true;
return false;
}
const LedType& _timing;
int _channel;
detail::RmtPulsePair _bitToRmt[ 2 ];
int _count;
std::unique_ptr< Rgb[] > _firstBuffer;
std::unique_ptr< Rgb[] > _secondBuffer;
Rgb *_buffer;
xSemaphoreHandle _finishedFlag;
int _pixelPosition;
int _componentPosition;
int _halfIdx;
};
class Apa102 {
public:
struct ApaRgb {
ApaRgb( uint8_t r = 0, uint8_t g = 0, uint32_t b = 0, uint32_t v = 0xFF )
: v( 0xE0 | v ), b( b ), g( g ), r( r )
{}
ApaRgb& operator=( const Rgb& o ) {
r = o.r;
g = o.g;
b = o.b;
return *this;
}
ApaRgb& operator=( const Hsv& o ) {
*this = Rgb{ o };
return *this;
}
uint8_t v, b, g, r;
};
static const int FINAL_FRAME_SIZE = 4;
static const int TRANS_COUNT = 2 + 8;
Apa102( int count, int clkpin, int datapin, BufferType doubleBuffer = SingleBuffer )
: _count( count ),
_firstBuffer( new ApaRgb[ count ] ),
_secondBuffer( doubleBuffer ? new ApaRgb[ count ] : nullptr ),
_initFrame( 0 )
{
spi_bus_config_t buscfg;
memset( &buscfg, 0, sizeof( buscfg ) );
buscfg.mosi_io_num = datapin;
buscfg.miso_io_num = -1;
buscfg.sclk_io_num = clkpin;
buscfg.quadwp_io_num = -1;
buscfg.quadhd_io_num = -1;
buscfg.max_transfer_sz = 65535;
spi_device_interface_config_t devcfg;
memset( &devcfg, 0, sizeof( devcfg ) );
devcfg.clock_speed_hz = 1000000;
devcfg.mode = 0;
devcfg.spics_io_num = -1;
devcfg.queue_size = TRANS_COUNT;
devcfg.pre_cb = nullptr;
auto ret = spi_bus_initialize( HSPI_HOST, &buscfg, 1 );
assert( ret == ESP_OK );
ret = spi_bus_add_device( HSPI_HOST, &devcfg, &_spi );
assert( ret == ESP_OK );
std::fill_n( _finalFrame, FINAL_FRAME_SIZE, 0xFFFFFFFF );
}
~Apa102() {
// ToDo
}
ApaRgb& operator[]( int idx ) {
return _firstBuffer[ idx ];
}
const ApaRgb& operator[]( int idx ) const {
return _firstBuffer[ idx ];
}
void show() {
_buffer = _firstBuffer.get();
startTransmission();
swapBuffers();
}
void wait() {
for ( int i = 0; i != _transCount; i++ ) {
spi_transaction_t *t;
spi_device_get_trans_result( _spi, &t, portMAX_DELAY );
}
}
private:
void swapBuffers() {
if ( _secondBuffer )
_firstBuffer.swap( _secondBuffer );
}
void startTransmission() {
for ( int i = 0; i != TRANS_COUNT; i++ ) {
_transactions[ i ].cmd = 0;
_transactions[ i ].addr = 0;
_transactions[ i ].flags = 0;
_transactions[ i ].rxlength = 0;
_transactions[ i ].rx_buffer = nullptr;
}
// Init frame
_transactions[ 0 ].length = 32;
_transactions[ 0 ].tx_buffer = &_initFrame;
spi_device_queue_trans( _spi, _transactions + 0, portMAX_DELAY );
// Data
_transactions[ 1 ].length = 32 * _count;
_transactions[ 1 ].tx_buffer = _buffer;
spi_device_queue_trans( _spi, _transactions + 1, portMAX_DELAY );
_transCount = 2;
// End frame
for ( int i = 0; i != 1 + _count / 32 / FINAL_FRAME_SIZE; i++ ) {
_transactions[ 2 + i ].length = 32 * FINAL_FRAME_SIZE;
_transactions[ 2 + i ].tx_buffer = _finalFrame;
spi_device_queue_trans( _spi, _transactions + 2 + i, portMAX_DELAY );
_transCount++;
}
}
spi_device_handle_t _spi;
int _count;
std::unique_ptr< ApaRgb[] > _firstBuffer, _secondBuffer;
ApaRgb *_buffer;
spi_transaction_t _transactions[ TRANS_COUNT ];
int _transCount;
uint32_t _initFrame;
uint32_t _finalFrame[ FINAL_FRAME_SIZE ];
};
class LDP8806 {
public:
struct LDP8806_GRB {
LDP8806_GRB( uint8_t g_7bit = 0, uint8_t r_7bit = 0, uint32_t b_7bit = 0 )
: g( g_7bit ), r( r_7bit ), b( b_7bit )
{
}
LDP8806_GRB& operator=( const Rgb& o ) {
//Convert 8->7bit colour
r = ( o.r * 127 / 256 ) | 0x80;
g = ( o.g * 127 / 256 ) | 0x80;
b = ( o.b * 127 / 256 ) | 0x80;
return *this;
}
LDP8806_GRB& operator=( const Hsv& o ) {
*this = Rgb{ o };
return *this;
}
uint8_t g, r, b;
};
static const int LED_FRAME_SIZE_BYTES = sizeof( LDP8806_GRB );
static const int LATCH_FRAME_SIZE_BYTES = 3;
static const int TRANS_COUNT_MAX = 20;//Arbitrary, supports up to 600 LED
LDP8806( int count, int clkpin, int datapin, BufferType doubleBuffer = SingleBuffer, uint32_t clock_speed_hz = 2000000 )
: _count( count ),
_firstBuffer( new LDP8806_GRB[ count ] ),
_secondBuffer( doubleBuffer ? new LDP8806_GRB[ count ] : nullptr ),
// one 'latch'/start-of-data mark frame for every 32 leds
_latchFrames( ( count + 31 ) / 32 )
{
spi_bus_config_t buscfg;
memset( &buscfg, 0, sizeof( buscfg ) );
buscfg.mosi_io_num = datapin;
buscfg.miso_io_num = -1;
buscfg.sclk_io_num = clkpin;
buscfg.quadwp_io_num = -1;
buscfg.quadhd_io_num = -1;
buscfg.max_transfer_sz = 65535;
spi_device_interface_config_t devcfg;
memset( &devcfg, 0, sizeof( devcfg ) );
devcfg.clock_speed_hz = clock_speed_hz;
devcfg.mode = 0;
devcfg.spics_io_num = -1;
devcfg.queue_size = TRANS_COUNT_MAX;
devcfg.pre_cb = nullptr;
auto ret = spi_bus_initialize( HSPI_HOST, &buscfg, 1 );
assert( ret == ESP_OK );
ret = spi_bus_add_device( HSPI_HOST, &devcfg, &_spi );
assert( ret == ESP_OK );
std::fill_n( _latchBuffer, LATCH_FRAME_SIZE_BYTES, 0x0 );
}
~LDP8806() {
// noop
}
LDP8806_GRB& operator[]( int idx ) {
return _firstBuffer[ idx ];
}
const LDP8806_GRB& operator[]( int idx ) const {
return _firstBuffer[ idx ];
}
void show() {
_buffer = _firstBuffer.get();
startTransmission();
swapBuffers();
}
void wait() {
while ( _transCount-- ) {
spi_transaction_t *t;
spi_device_get_trans_result( _spi, &t, portMAX_DELAY );
}
}
private:
void swapBuffers() {
if ( _secondBuffer )
_firstBuffer.swap( _secondBuffer );
}
void startTransmission() {
_transCount = 0;
for ( int i = 0; i != TRANS_COUNT_MAX; i++ ) {
_transactions[ i ].cmd = 0;
_transactions[ i ].addr = 0;
_transactions[ i ].flags = 0;
_transactions[ i ].rxlength = 0;
_transactions[ i ].rx_buffer = nullptr;
}
// LED Data
_transactions[ 0 ].length = ( LED_FRAME_SIZE_BYTES * 8 ) * _count;
_transactions[ 0 ].tx_buffer = _buffer;
spi_device_queue_trans( _spi, _transactions + _transCount, portMAX_DELAY );
_transCount++;
// 'latch'/start-of-data marker frames
for ( int i = 0; i < _latchFrames; i++ ) {
_transactions[ _transCount ].length = ( LATCH_FRAME_SIZE_BYTES * 8 );
_transactions[ _transCount ].tx_buffer = _latchBuffer;
spi_device_queue_trans( _spi, _transactions + _transCount, portMAX_DELAY );
_transCount++;
}
}
spi_device_handle_t _spi;
int _count;
std::unique_ptr< LDP8806_GRB[] > _firstBuffer, _secondBuffer;
LDP8806_GRB *_buffer;
spi_transaction_t _transactions[ TRANS_COUNT_MAX ];
int _transCount;
int _latchFrames;
uint8_t _latchBuffer[ LATCH_FRAME_SIZE_BYTES ];
};

View File

@@ -39,18 +39,27 @@ bool ConfigFile::GetNextParagraph(std::string& aktparamgraph, bool &disabled, bo
bool ConfigFile::getNextLine(std::string *rt, bool &disabled, bool &eof)
{
eof = false;
char zw[1024];
char zw[1024] = "";
if (pFile == NULL)
{
*rt = "";
return false;
}
fgets(zw, 1024, pFile);
printf("%s", zw);
if ((strlen(zw) == 0) && feof(pFile))
if (fgets(zw, 1024, pFile))
{
printf("%s", zw);
if ((strlen(zw) == 0) && feof(pFile))
{
*rt = "";
eof = true;
return false;
}
}
else
{
*rt = "";
eof = true;
eof = true;
return false;
}
*rt = zw;

View File

@@ -0,0 +1,132 @@
#include "Color.h"
#include <algorithm>
#include <cmath>
#include <cassert>
namespace {
// Int -> fixed point
int up( int x ) { return x * 255; }
} // namespace
int iRgbSqrt( int num ) {
// https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Binary_numeral_system_.28base_2.29
assert( "sqrt input should be non-negative" && num >= 0 );
assert( "sqrt input should no exceed 16 bits" && num <= 0xFFFF );
int res = 0;
int bit = 1 << 16;
while ( bit > num )
bit >>= 2;
while ( bit != 0 ) {
if ( num >= res + bit ) {
num -= res + bit;
res = ( res >> 1 ) + bit;
} else
res >>= 1;
bit >>= 2;
}
return res;
}
Rgb::Rgb( Hsv y ) {
// https://stackoverflow.com/questions/24152553/hsv-to-rgb-and-back-without-floating-point-math-in-python
// greyscale
if( y.s == 0 ) {
r = g = b = y.v;
return;
}
const int region = y.h / 43;
const int remainder = ( y.h - ( region * 43 ) ) * 6;
const int p = ( y.v * ( 255 - y.s ) ) >> 8;
const int q = ( y.v * ( 255 - ( ( y.s * remainder ) >> 8 ) ) ) >> 8;
const int t = ( y.v * ( 255 - ( ( y.s * (255 -remainder ) ) >> 8 ) ) ) >> 8;
switch( region ) {
case 0: r = y.v; g = t; b = p; break;
case 1: r = q; g = y.v; b = p; break;
case 2: r = p; g = y.v; b = t; break;
case 3: r = p; g = q; b = y.v; break;
case 4: r = t; g = p; b = y.v; break;
case 5: r = y.v; g = p; b = q; break;
default: __builtin_trap();
}
a = y.a;
}
Rgb& Rgb::operator=( Hsv hsv ) {
Rgb r{ hsv };
swap( r );
return *this;
}
Rgb Rgb::operator+( Rgb in ) const {
auto copy = *this;
copy += in;
return copy;
}
Rgb& Rgb::operator+=( Rgb in ) {
unsigned int red = r + in.r;
r = ( red < 255 ) ? red : 255;
unsigned int green = g + in.g;
g = ( green < 255 ) ? green : 255;
unsigned int blue = b + in.b;
b = ( blue < 255 ) ? blue : 255;
return *this;
}
Rgb& Rgb::blend( Rgb in ) {
unsigned int inAlpha = in.a * ( 255 - a );
unsigned int alpha = a + inAlpha;
r = iRgbSqrt( ( ( r * r * a ) + ( in.r * in.r * inAlpha ) ) / alpha );
g = iRgbSqrt( ( ( g * g * a ) + ( in.g * in.g * inAlpha ) ) / alpha );
b = iRgbSqrt( ( ( b * b * a ) + ( in.b * in.b * inAlpha ) ) / alpha );
a = alpha;
return *this;
}
uint8_t IRAM_ATTR Rgb::getGrb( int idx ) {
switch ( idx ) {
case 0: return g;
case 1: return r;
case 2: return b;
}
__builtin_unreachable();
}
Hsv::Hsv( Rgb r ) {
int min = std::min( r.r, std::min( r.g, r.b ) );
int max = std::max( r.r, std::max( r.g, r.b ) );
int chroma = max - min;
v = max;
if ( chroma == 0 ) {
h = s = 0;
return;
}
s = up( chroma ) / max;
int hh;
if ( max == r.r )
hh = ( up( int( r.g ) - int( r.b ) ) ) / chroma / 6;
else if ( max == r.g )
hh = 255 / 3 + ( up( int( r.b ) - int( r.r ) ) ) / chroma / 6;
else
hh = 2 * 255 / 3 + ( up( int( r.r ) - int( r.g ) ) ) / chroma / 6;
if ( hh < 0 )
hh += 255;
h = hh;
a = r.a;
}
Hsv& Hsv::operator=( Rgb rgb ) {
Hsv h{ rgb };
swap( h );
return *this;
}

View File

@@ -0,0 +1,69 @@
#pragma once
#include <cstdint>
#include "esp_attr.h"
union Hsv;
union Rgb {
struct __attribute__ ((packed)) {
uint8_t r, g, b, a;
};
uint32_t value;
Rgb( uint8_t r = 0, uint8_t g = 0, uint8_t b = 0, uint8_t a = 255 ) : r( r ), g( g ), b( b ), a( a ) {}
Rgb( Hsv c );
Rgb& operator=( Rgb rgb ) { swap( rgb ); return *this; }
Rgb& operator=( Hsv hsv );
Rgb operator+( Rgb in ) const;
Rgb& operator+=( Rgb in );
bool operator==( Rgb in ) const { return in.value == value; }
Rgb& blend( Rgb in );
void swap( Rgb& o ) { value = o.value; }
void linearize() {
r = channelGamma(r);
g = channelGamma(g);
b = channelGamma(b);
}
uint8_t IRAM_ATTR getGrb( int idx );
void stretchChannels( uint8_t maxR, uint8_t maxG, uint8_t maxB ) {
r = stretch( r, maxR );
g = stretch( g, maxG );
b = stretch( b, maxB );
}
void stretchChannelsEvenly( uint8_t max ) {
stretchChannels( max, max, max );
}
private:
uint8_t stretch( int value, uint8_t max ) {
return ( value * max ) >> 8;
}
uint8_t channelGamma( int channel ) {
/* The optimal gamma correction is x^2.8. However, this is expensive to
* compute. Therefore, we use x^3 for gamma correction. Also, we add a
* bias as the WS2812 LEDs do not turn on for values less than 4. */
if (channel == 0)
return channel;
channel = channel * channel * channel * 251;
channel >>= 24;
return static_cast< uint8_t >( 4 + channel );
}
};
union Hsv {
struct __attribute__ ((packed)) {
uint8_t h, s, v, a;
};
uint32_t value;
Hsv( uint8_t h, uint8_t s = 0, uint8_t v = 0, uint8_t a = 255 ) : h( h ), s( s ), v( v ), a( a ) {}
Hsv( Rgb r );
Hsv& operator=( Hsv h ) { swap( h ); return *this; }
Hsv& operator=( Rgb rgb );
bool operator==( Hsv in ) const { return in.value == value; }
void swap( Hsv& o ) { value = o.value; }
};

View File

@@ -0,0 +1,63 @@
#include "SmartLeds.h"
IsrCore SmartLed::_interruptCore = CoreCurrent;
intr_handle_t SmartLed::_interruptHandle = NULL;
SmartLed*& IRAM_ATTR SmartLed::ledForChannel( int channel ) {
static SmartLed* table[8] = { nullptr };
assert( channel < 8 );
return table[ channel ];
}
void IRAM_ATTR SmartLed::interruptHandler(void*) {
for (int channel = 0; channel != 8; channel++) {
auto self = ledForChannel( channel );
if ( RMT.int_st.val & (1 << (24 + channel ) ) ) { // tx_thr_event
if ( self )
self->copyRmtHalfBlock();
RMT.int_clr.val |= 1 << ( 24 + channel );
} else if ( RMT.int_st.val & ( 1 << (3 * channel ) ) ) { // tx_end
if ( self )
xSemaphoreGiveFromISR( self->_finishedFlag, nullptr );
RMT.int_clr.val |= 1 << ( 3 * channel );
}
}
}
void IRAM_ATTR SmartLed::copyRmtHalfBlock() {
int offset = detail::MAX_PULSES * _halfIdx;
_halfIdx = !_halfIdx;
int len = 3 - _componentPosition + 3 * ( _count - 1 );
len = std::min( len, detail::MAX_PULSES / 8 );
if ( !len ) {
for ( int i = 0; i < detail::MAX_PULSES; i++) {
RMTMEM.chan[ _channel].data32[i + offset ].val = 0;
}
}
int i;
for ( i = 0; i != len && _pixelPosition != _count; i++ ) {
uint8_t val = _buffer[ _pixelPosition ].getGrb( _componentPosition );
for ( int j = 0; j != 8; j++, val <<= 1 ) {
int bit = val >> 7;
int idx = i * 8 + offset + j;
RMTMEM.chan[ _channel ].data32[ idx ].val = _bitToRmt[ bit & 0x01 ].value;
}
if ( _pixelPosition == _count - 1 && _componentPosition == 2 ) {
RMTMEM.chan[ _channel ].data32[ i * 8 + offset + 7 ].duration1 =
_timing.TRS / ( detail::RMT_DURATION_NS * detail::DIVIDER );
}
_componentPosition++;
if ( _componentPosition == 3 ) {
_componentPosition = 0;
_pixelPosition++;
}
}
for ( i *= 8; i != detail::MAX_PULSES; i++ ) {
RMTMEM.chan[ _channel ].data32[ i + offset ].val = 0;
}
}

View File

@@ -0,0 +1,530 @@
#pragma once
/*
* A C++ driver for the WS2812 LEDs using the RMT peripheral on the ESP32.
*
* Jan "yaqwsx" Mrázek <email@honzamrazek.cz>
*
* Based on the work by Martin F. Falatic - https://github.com/FozzTexx/ws2812-demo
*/
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <memory>
#include <cassert>
#include <cstring>
#if defined ( ARDUINO )
extern "C" { // ...someone forgot to put in the includes...
#include "esp32-hal.h"
#include "esp_intr_alloc.h"
#include "esp_ipc.h"
#include "driver/gpio.h"
#include "driver/periph_ctrl.h"
#include "freertos/semphr.h"
#include "soc/rmt_struct.h"
#include <driver/spi_master.h>
#include "esp_idf_version.h"
#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL( 4, 0, 0 )
#include "soc/dport_reg.h"
#endif
}
#elif defined ( ESP_PLATFORM )
extern "C" { // ...someone forgot to put in the includes...
#include <esp_intr_alloc.h>
#include <esp_ipc.h>
#include <driver/gpio.h>
#include <freertos/FreeRTOS.h>
#include <freertos/semphr.h>
#include <soc/dport_reg.h>
#include <soc/gpio_sig_map.h>
#include <soc/rmt_struct.h>
#include <driver/spi_master.h>
}
#include <stdio.h>
#endif
#include "Color.h"
namespace detail {
struct TimingParams {
uint32_t T0H;
uint32_t T1H;
uint32_t T0L;
uint32_t T1L;
uint32_t TRS;
};
union RmtPulsePair {
struct {
int duration0:15;
int level0:1;
int duration1:15;
int level1:1;
};
uint32_t value;
};
static const int DIVIDER = 4; // 8 still seems to work, but timings become marginal
static const int MAX_PULSES = 32; // A channel has a 64 "pulse" buffer - we use half per pass
static const double RMT_DURATION_NS = 12.5; // minimum time of a single RMT duration based on clock ns
} // namespace detail
using LedType = detail::TimingParams;
static const LedType LED_WS2812 = { 350, 700, 800, 600, 50000 };
static const LedType LED_WS2812B = { 400, 850, 850, 400, 50100 };
static const LedType LED_SK6812 = { 300, 600, 900, 600, 80000 };
static const LedType LED_WS2813 = { 350, 800, 350, 350, 300000 };
enum BufferType { SingleBuffer = 0, DoubleBuffer };
enum IsrCore { CoreFirst = 0, CoreSecond = 1, CoreCurrent = 2};
class SmartLed {
public:
// The RMT interrupt must not run on the same core as WiFi interrupts, otherwise SmartLeds
// can't fill the RMT buffer fast enough, resulting in rendering artifacts.
// Usually, that means you have to set isrCore == CoreSecond.
//
// If you use anything other than CoreCurrent, the FreeRTOS scheduler MUST be already running,
// so you can't use it if you define SmartLed as global variable.
SmartLed( const LedType& type, int count, int pin, int channel = 0, BufferType doubleBuffer = SingleBuffer, IsrCore isrCore = CoreCurrent)
: _timing( type ),
_channel( channel ),
_count( count ),
_firstBuffer( new Rgb[ count ] ),
_secondBuffer( doubleBuffer ? new Rgb[ count ] : nullptr ),
_finishedFlag( xSemaphoreCreateBinary() )
{
assert( channel >= 0 && channel < 8 );
assert( ledForChannel( channel ) == nullptr );
xSemaphoreGive( _finishedFlag );
DPORT_SET_PERI_REG_MASK( DPORT_PERIP_CLK_EN_REG, DPORT_RMT_CLK_EN );
DPORT_CLEAR_PERI_REG_MASK( DPORT_PERIP_RST_EN_REG, DPORT_RMT_RST );
PIN_FUNC_SELECT( GPIO_PIN_MUX_REG[ pin ], 2 );
gpio_set_direction( static_cast< gpio_num_t >( pin ), GPIO_MODE_OUTPUT );
gpio_matrix_out( static_cast< gpio_num_t >( pin ), RMT_SIG_OUT0_IDX + _channel, 0, 0 );
initChannel( _channel );
RMT.tx_lim_ch[ _channel ].limit = detail::MAX_PULSES;
RMT.int_ena.val |= 1 << ( 24 + _channel );
RMT.int_ena.val |= 1 << ( 3 * _channel );
_bitToRmt[ 0 ].level0 = 1;
_bitToRmt[ 0 ].level1 = 0;
_bitToRmt[ 0 ].duration0 = _timing.T0H / ( detail::RMT_DURATION_NS * detail::DIVIDER );
_bitToRmt[ 0 ].duration1 = _timing.T0L / ( detail::RMT_DURATION_NS * detail::DIVIDER );
_bitToRmt[ 1 ].level0 = 1;
_bitToRmt[ 1 ].level1 = 0;
_bitToRmt[ 1 ].duration0 = _timing.T1H / ( detail::RMT_DURATION_NS * detail::DIVIDER );
_bitToRmt[ 1 ].duration1 = _timing.T1L / ( detail::RMT_DURATION_NS * detail::DIVIDER );
if ( !anyAlive() ) {
_interruptCore = isrCore;
if(isrCore != CoreCurrent) {
ESP_ERROR_CHECK(esp_ipc_call_blocking(isrCore, registerInterrupt, NULL));
} else {
registerInterrupt(NULL);
}
}
ledForChannel( channel ) = this;
}
~SmartLed() {
ledForChannel( _channel ) = nullptr;
if ( !anyAlive() ) {
if(_interruptCore != CoreCurrent) {
ESP_ERROR_CHECK(esp_ipc_call_blocking(_interruptCore, unregisterInterrupt, NULL));
} else {
unregisterInterrupt(NULL);
}
}
vSemaphoreDelete( _finishedFlag );
}
Rgb& operator[]( int idx ) {
return _firstBuffer[ idx ];
}
const Rgb& operator[]( int idx ) const {
return _firstBuffer[ idx ];
}
void show() {
_buffer = _firstBuffer.get();
startTransmission();
swapBuffers();
}
bool wait( TickType_t timeout = portMAX_DELAY ) {
if( xSemaphoreTake( _finishedFlag, timeout ) == pdTRUE ) {
xSemaphoreGive( _finishedFlag );
return true;
}
return false;
}
int size() const {
return _count;
}
Rgb *begin() { return _firstBuffer.get(); }
const Rgb *begin() const { return _firstBuffer.get(); }
const Rgb *cbegin() const { return _firstBuffer.get(); }
Rgb *end() { return _firstBuffer.get() + _count; }
const Rgb *end() const { return _firstBuffer.get() + _count; }
const Rgb *cend() const { return _firstBuffer.get() + _count; }
private:
static intr_handle_t _interruptHandle;
static IsrCore _interruptCore;
static void initChannel( int channel ) {
RMT.apb_conf.fifo_mask = 1; //enable memory access, instead of FIFO mode.
RMT.apb_conf.mem_tx_wrap_en = 1; //wrap around when hitting end of buffer
RMT.conf_ch[ channel ].conf0.div_cnt = detail::DIVIDER;
RMT.conf_ch[ channel ].conf0.mem_size = 1;
RMT.conf_ch[ channel ].conf0.carrier_en = 0;
RMT.conf_ch[ channel ].conf0.carrier_out_lv = 1;
RMT.conf_ch[ channel ].conf0.mem_pd = 0;
RMT.conf_ch[ channel ].conf1.rx_en = 0;
RMT.conf_ch[ channel ].conf1.mem_owner = 0;
RMT.conf_ch[ channel ].conf1.tx_conti_mode = 0; //loop back mode.
RMT.conf_ch[ channel ].conf1.ref_always_on = 1; // use apb clock: 80M
RMT.conf_ch[ channel ].conf1.idle_out_en = 1;
RMT.conf_ch[ channel ].conf1.idle_out_lv = 0;
}
static void registerInterrupt(void *) {
ESP_ERROR_CHECK(esp_intr_alloc( ETS_RMT_INTR_SOURCE, 0, interruptHandler, nullptr, &_interruptHandle));
}
static void unregisterInterrupt(void*) {
esp_intr_free( _interruptHandle );
}
static SmartLed*& IRAM_ATTR ledForChannel( int channel );
static void IRAM_ATTR interruptHandler( void* );
void IRAM_ATTR copyRmtHalfBlock();
void swapBuffers() {
if ( _secondBuffer )
_firstBuffer.swap( _secondBuffer );
}
void startTransmission() {
// Invalid use of the library
if( xSemaphoreTake( _finishedFlag, 0 ) != pdTRUE )
abort();
_pixelPosition = _componentPosition = _halfIdx = 0;
copyRmtHalfBlock();
if ( _pixelPosition < _count )
copyRmtHalfBlock();
RMT.conf_ch[ _channel ].conf1.mem_rd_rst = 1;
RMT.conf_ch[ _channel ].conf1.tx_start = 1;
}
static bool anyAlive() {
for ( int i = 0; i != 8; i++ )
if ( ledForChannel( i ) != nullptr ) return true;
return false;
}
const LedType& _timing;
int _channel;
detail::RmtPulsePair _bitToRmt[ 2 ];
int _count;
std::unique_ptr< Rgb[] > _firstBuffer;
std::unique_ptr< Rgb[] > _secondBuffer;
Rgb *_buffer;
xSemaphoreHandle _finishedFlag;
int _pixelPosition;
int _componentPosition;
int _halfIdx;
};
class Apa102 {
public:
struct ApaRgb {
ApaRgb( uint8_t r = 0, uint8_t g = 0, uint32_t b = 0, uint32_t v = 0xFF )
: v( 0xE0 | v ), b( b ), g( g ), r( r )
{}
ApaRgb& operator=( const Rgb& o ) {
r = o.r;
g = o.g;
b = o.b;
return *this;
}
ApaRgb& operator=( const Hsv& o ) {
*this = Rgb{ o };
return *this;
}
uint8_t v, b, g, r;
};
static const int FINAL_FRAME_SIZE = 4;
static const int TRANS_COUNT = 2 + 8;
Apa102( int count, int clkpin, int datapin, BufferType doubleBuffer = SingleBuffer )
: _count( count ),
_firstBuffer( new ApaRgb[ count ] ),
_secondBuffer( doubleBuffer ? new ApaRgb[ count ] : nullptr ),
_initFrame( 0 )
{
spi_bus_config_t buscfg;
memset( &buscfg, 0, sizeof( buscfg ) );
buscfg.mosi_io_num = datapin;
buscfg.miso_io_num = -1;
buscfg.sclk_io_num = clkpin;
buscfg.quadwp_io_num = -1;
buscfg.quadhd_io_num = -1;
buscfg.max_transfer_sz = 65535;
spi_device_interface_config_t devcfg;
memset( &devcfg, 0, sizeof( devcfg ) );
devcfg.clock_speed_hz = 1000000;
devcfg.mode = 0;
devcfg.spics_io_num = -1;
devcfg.queue_size = TRANS_COUNT;
devcfg.pre_cb = nullptr;
auto ret = spi_bus_initialize( HSPI_HOST, &buscfg, 1 );
assert( ret == ESP_OK );
ret = spi_bus_add_device( HSPI_HOST, &devcfg, &_spi );
assert( ret == ESP_OK );
std::fill_n( _finalFrame, FINAL_FRAME_SIZE, 0xFFFFFFFF );
}
~Apa102() {
// ToDo
}
ApaRgb& operator[]( int idx ) {
return _firstBuffer[ idx ];
}
const ApaRgb& operator[]( int idx ) const {
return _firstBuffer[ idx ];
}
void show() {
_buffer = _firstBuffer.get();
startTransmission();
swapBuffers();
}
void wait() {
for ( int i = 0; i != _transCount; i++ ) {
spi_transaction_t *t;
spi_device_get_trans_result( _spi, &t, portMAX_DELAY );
}
}
private:
void swapBuffers() {
if ( _secondBuffer )
_firstBuffer.swap( _secondBuffer );
}
void startTransmission() {
for ( int i = 0; i != TRANS_COUNT; i++ ) {
_transactions[ i ].cmd = 0;
_transactions[ i ].addr = 0;
_transactions[ i ].flags = 0;
_transactions[ i ].rxlength = 0;
_transactions[ i ].rx_buffer = nullptr;
}
// Init frame
_transactions[ 0 ].length = 32;
_transactions[ 0 ].tx_buffer = &_initFrame;
spi_device_queue_trans( _spi, _transactions + 0, portMAX_DELAY );
// Data
_transactions[ 1 ].length = 32 * _count;
_transactions[ 1 ].tx_buffer = _buffer;
spi_device_queue_trans( _spi, _transactions + 1, portMAX_DELAY );
_transCount = 2;
// End frame
for ( int i = 0; i != 1 + _count / 32 / FINAL_FRAME_SIZE; i++ ) {
_transactions[ 2 + i ].length = 32 * FINAL_FRAME_SIZE;
_transactions[ 2 + i ].tx_buffer = _finalFrame;
spi_device_queue_trans( _spi, _transactions + 2 + i, portMAX_DELAY );
_transCount++;
}
}
spi_device_handle_t _spi;
int _count;
std::unique_ptr< ApaRgb[] > _firstBuffer, _secondBuffer;
ApaRgb *_buffer;
spi_transaction_t _transactions[ TRANS_COUNT ];
int _transCount;
uint32_t _initFrame;
uint32_t _finalFrame[ FINAL_FRAME_SIZE ];
};
class LDP8806 {
public:
struct LDP8806_GRB {
LDP8806_GRB( uint8_t g_7bit = 0, uint8_t r_7bit = 0, uint32_t b_7bit = 0 )
: g( g_7bit ), r( r_7bit ), b( b_7bit )
{
}
LDP8806_GRB& operator=( const Rgb& o ) {
//Convert 8->7bit colour
r = ( o.r * 127 / 256 ) | 0x80;
g = ( o.g * 127 / 256 ) | 0x80;
b = ( o.b * 127 / 256 ) | 0x80;
return *this;
}
LDP8806_GRB& operator=( const Hsv& o ) {
*this = Rgb{ o };
return *this;
}
uint8_t g, r, b;
};
static const int LED_FRAME_SIZE_BYTES = sizeof( LDP8806_GRB );
static const int LATCH_FRAME_SIZE_BYTES = 3;
static const int TRANS_COUNT_MAX = 20;//Arbitrary, supports up to 600 LED
LDP8806( int count, int clkpin, int datapin, BufferType doubleBuffer = SingleBuffer, uint32_t clock_speed_hz = 2000000 )
: _count( count ),
_firstBuffer( new LDP8806_GRB[ count ] ),
_secondBuffer( doubleBuffer ? new LDP8806_GRB[ count ] : nullptr ),
// one 'latch'/start-of-data mark frame for every 32 leds
_latchFrames( ( count + 31 ) / 32 )
{
spi_bus_config_t buscfg;
memset( &buscfg, 0, sizeof( buscfg ) );
buscfg.mosi_io_num = datapin;
buscfg.miso_io_num = -1;
buscfg.sclk_io_num = clkpin;
buscfg.quadwp_io_num = -1;
buscfg.quadhd_io_num = -1;
buscfg.max_transfer_sz = 65535;
spi_device_interface_config_t devcfg;
memset( &devcfg, 0, sizeof( devcfg ) );
devcfg.clock_speed_hz = clock_speed_hz;
devcfg.mode = 0;
devcfg.spics_io_num = -1;
devcfg.queue_size = TRANS_COUNT_MAX;
devcfg.pre_cb = nullptr;
auto ret = spi_bus_initialize( HSPI_HOST, &buscfg, 1 );
assert( ret == ESP_OK );
ret = spi_bus_add_device( HSPI_HOST, &devcfg, &_spi );
assert( ret == ESP_OK );
std::fill_n( _latchBuffer, LATCH_FRAME_SIZE_BYTES, 0x0 );
}
~LDP8806() {
// noop
}
LDP8806_GRB& operator[]( int idx ) {
return _firstBuffer[ idx ];
}
const LDP8806_GRB& operator[]( int idx ) const {
return _firstBuffer[ idx ];
}
void show() {
_buffer = _firstBuffer.get();
startTransmission();
swapBuffers();
}
void wait() {
while ( _transCount-- ) {
spi_transaction_t *t;
spi_device_get_trans_result( _spi, &t, portMAX_DELAY );
}
}
private:
void swapBuffers() {
if ( _secondBuffer )
_firstBuffer.swap( _secondBuffer );
}
void startTransmission() {
_transCount = 0;
for ( int i = 0; i != TRANS_COUNT_MAX; i++ ) {
_transactions[ i ].cmd = 0;
_transactions[ i ].addr = 0;
_transactions[ i ].flags = 0;
_transactions[ i ].rxlength = 0;
_transactions[ i ].rx_buffer = nullptr;
}
// LED Data
_transactions[ 0 ].length = ( LED_FRAME_SIZE_BYTES * 8 ) * _count;
_transactions[ 0 ].tx_buffer = _buffer;
spi_device_queue_trans( _spi, _transactions + _transCount, portMAX_DELAY );
_transCount++;
// 'latch'/start-of-data marker frames
for ( int i = 0; i < _latchFrames; i++ ) {
_transactions[ _transCount ].length = ( LATCH_FRAME_SIZE_BYTES * 8 );
_transactions[ _transCount ].tx_buffer = _latchBuffer;
spi_device_queue_trans( _spi, _transactions + _transCount, portMAX_DELAY );
_transCount++;
}
}
spi_device_handle_t _spi;
int _count;
std::unique_ptr< LDP8806_GRB[] > _firstBuffer, _secondBuffer;
LDP8806_GRB *_buffer;
spi_transaction_t _transactions[ TRANS_COUNT_MAX ];
int _transCount;
int _latchFrames;
uint8_t _latchBuffer[ LATCH_FRAME_SIZE_BYTES ];
};

View File

@@ -8,6 +8,8 @@
#include "esp_system.h"
#include "esp_event.h"
#include "server_tflite.h"
//#define LOG_LOCAL_LEVEL ESP_LOG_DEBUG
#include "esp_log.h"
//#include "errno.h"
@@ -105,7 +107,8 @@ void GpioPin::init()
//configure GPIO with the given settings
gpio_config(&io_conf);
if (_interruptType != GPIO_INTR_DISABLE) {
// if (_interruptType != GPIO_INTR_DISABLE) { // ohne GPIO_PIN_MODE_EXTERNAL_FLASH_WS281X, wenn das genutzt wird, dann soll auch der Handler hier nicht initialisiert werden, da das dann über SmartLED erfolgt.
if ((_interruptType != GPIO_INTR_DISABLE) && (_interruptType != GPIO_PIN_MODE_EXTERNAL_FLASH_WS281X)) {
//hook isr handler for specific gpio pin
ESP_LOGD(TAG_SERVERGPIO, "GpioPin::init add isr handler for GPIO %d\r\n", _gpio);
gpio_isr_handler_add(_gpio, gpio_isr_handler, (void*)&_gpio);
@@ -210,12 +213,15 @@ void GpioHandler::init()
// printf("wait before start %ldms\r\n", (long) xDelay);
// vTaskDelay( xDelay );
printf("*************** Start GPIOHandler_Init *****************\n");
if (gpioMap == NULL) {
gpioMap = new std::map<gpio_num_t, GpioPin*>();
} else {
clear();
}
ESP_LOGI(TAG_SERVERGPIO, "read GPIO config and init GPIO");
if (!readConfig()) {
clear();
@@ -225,6 +231,7 @@ void GpioHandler::init()
return;
}
for(std::map<gpio_num_t, GpioPin*>::iterator it = gpioMap->begin(); it != gpioMap->end(); ++it) {
it->second->init();
}
@@ -291,17 +298,32 @@ bool GpioHandler::readConfig()
std::string line = "";
bool disabledLine = false;
bool eof = false;
gpio_num_t gpioExtLED = (gpio_num_t) 0;
// printf("readConfig - Start 1\n");
while ((!configFile.GetNextParagraph(line, disabledLine, eof) || (line.compare("[GPIO]") != 0)) && !disabledLine && !eof) {}
while ((!configFile.GetNextParagraph(line, disabledLine, eof) || (line.compare("[GPIO]") != 0)) && !eof) {}
if (eof)
return false;
// printf("readConfig - Start 2 line: %s, disabbledLine: %d\n", line.c_str(), (int) disabledLine);
_isEnabled = !disabledLine;
if (!_isEnabled)
return false;
std::string mainTopicMQTT = "";
// printf("readConfig - Start 3\n");
// std::string mainTopicMQTT = "";
std::string mainTopicMQTT = GetMQTTMainTopic();
if (mainTopicMQTT.length() > 0)
{
mainTopicMQTT = mainTopicMQTT + "/GPIO";
ESP_LOGD(TAG_SERVERGPIO, "MAINTOPICMQTT found\r\n");
}
bool registerISR = false;
while (configFile.getNextLine(&line, disabledLine, eof) && !configFile.isNewParagraph(line))
{
@@ -313,8 +335,8 @@ bool GpioHandler::readConfig()
// std::string gpioStr = pieces_match[1];
ESP_LOGD(TAG_SERVERGPIO, "conf param %s\r\n", toUpper(zerlegt[0]).c_str());
if (toUpper(zerlegt[0]) == "MAINTOPICMQTT") {
ESP_LOGD(TAG_SERVERGPIO, "MAINTOPICMQTT found\r\n");
mainTopicMQTT = zerlegt[1];
// ESP_LOGD(TAG_SERVERGPIO, "MAINTOPICMQTT found\r\n");
// mainTopicMQTT = zerlegt[1];
} else if ((zerlegt[0].rfind("IO", 0) == 0) && (zerlegt.size() >= 6))
{
ESP_LOGI(TAG_SERVERGPIO,"Enable GP%s in %s mode", zerlegt[0].c_str(), zerlegt[1].c_str());
@@ -335,10 +357,40 @@ bool GpioHandler::readConfig()
GpioPin* gpioPin = new GpioPin(gpioNr, gpioName, pinMode, intType,dutyResolution, mqttTopic, httpEnabled);
(*gpioMap)[gpioNr] = gpioPin;
if (pinMode == GPIO_PIN_MODE_EXTERNAL_FLASH_WS281X)
{
printf("Set WS2812 to GPIO %d\n", gpioNr);
gpioExtLED = gpioNr;
}
if (intType != GPIO_INTR_DISABLE) {
registerISR = true;
}
}
if (toUpper(zerlegt[0]) == "LEDNUMBERS")
{
LEDNumbers = stoi(zerlegt[1]);
}
if (toUpper(zerlegt[0]) == "LEDCOLOR")
{
uint8_t _r, _g, _b;
_r = stoi(zerlegt[1]);
_g = stoi(zerlegt[2]);
_b = stoi(zerlegt[3]);
LEDColor = Rgb{_r, _g, _b};
}
if (toUpper(zerlegt[0]) == "LEDTYPE")
{
if (zerlegt[1] == "WS2812")
LEDType = LED_WS2812;
if (zerlegt[1] == "WS2812B")
LEDType = LED_WS2812B;
if (zerlegt[1] == "SK6812")
LEDType = LED_SK6812;
if (zerlegt[1] == "WS2813")
LEDType = LED_WS2813;
}
}
if (registerISR) {
@@ -346,6 +398,28 @@ bool GpioHandler::readConfig()
gpio_install_isr_service(ESP_INTR_FLAG_LOWMED | ESP_INTR_FLAG_IRAM);
}
if (gpioExtLED > 0)
{
// LogFile.WriteToFile("Startsequence 06");
// vTaskDelay( xDelay );
// xDelay = 5000 / portTICK_PERIOD_MS;
// printf("main: sleep for : %ldms\n", (long) xDelay);
SmartLed leds( LED_WS2812, 2, GPIO_NUM_12, 0, DoubleBuffer );
leds[ 0 ] = Rgb{ 255, 0, 0 };
leds[ 1 ] = Rgb{ 255, 255, 255 };
leds.show();
/*
// _SmartLED = new SmartLed(LEDType, LEDNumbers, gpioExtLED, 0, DoubleBuffer);
_SmartLED = new SmartLed( LED_WS2812, 2, GPIO_NUM_12, 0, DoubleBuffer );
(*_SmartLED)[ 0 ] = Rgb{ 255, 0, 0 };
(*_SmartLED)[ 1 ] = LEDColor;
_SmartLED->show();
*/
}
return true;
}
@@ -487,7 +561,24 @@ void GpioHandler::flashLightEnable(bool value)
} else {
ESP_LOGE(TAG_SERVERGPIO, "Can't set flash light pin GPIO %d. Error: %s\r\n", (int)it->first, resp_str.c_str());
}
}
} else
{
if (it->second->getMode() == GPIO_PIN_MODE_EXTERNAL_FLASH_WS281X)
{
SmartLed leds( LEDType, LEDNumbers, it->second->getGPIO(), 0, DoubleBuffer );
if (value)
{
for (int i = 0; i < LEDNumbers; ++i)
leds[i] = LEDColor;
}
else
{
for (int i = 0; i < LEDNumbers; ++i)
leds[i] = Rgb{0, 0, 0};
}
leds.show();
}
}
}
}
}

View File

@@ -7,6 +7,8 @@
#include <map>
#include "driver/gpio.h"
#include "SmartLeds.h"
//#include "ClassControllCamera.h"
typedef enum {
@@ -45,6 +47,7 @@ public:
void gpioInterrupt(int value);
gpio_int_type_t getInterruptType() { return _interruptType; }
gpio_pin_mode_t getMode() { return _mode; }
gpio_num_t getGPIO(){return _gpio;};
private:
gpio_num_t _gpio;
@@ -80,6 +83,11 @@ private:
TaskHandle_t xHandleTaskGpio = NULL;
bool _isEnabled = false;
int LEDNumbers = 2;
Rgb LEDColor = Rgb{ 255, 255, 255 };
LedType LEDType = LED_WS2812;
bool readConfig();
void clear();

View File

@@ -499,6 +499,7 @@ void CCamera::LightOnOff(bool status)
{
GpioHandler* gpioHandler = gpio_handler_get();
if ((gpioHandler != NULL) && (gpioHandler->isEnabled())) {
printf("Use gpioHandler flashLigh\n");
gpioHandler->flashLightEnable(status);
} else {
// Init the GPIO

View File

@@ -486,10 +486,10 @@ static esp_err_t upload_post_handler(httpd_req_t *req)
int start_fn = strlen(((struct file_server_data *)req->user_ctx)->base_path);
printf("Directory: %s, start_fn: %d, found: %d\n", directory.c_str(), start_fn, found);
directory = directory.substr(start_fn, found - start_fn + 1);
printf("Directory danach: %s\n", directory.c_str());
printf("Directory danach 1: %s\n", directory.c_str());
directory = "/fileserver" + directory;
printf("Directory danach: %s\n", directory.c_str());
printf("Directory danach 2: %s\n", directory.c_str());
/* Redirect onto root to see the updated file list */
httpd_resp_set_status(req, "303 See Other");
@@ -500,11 +500,13 @@ static esp_err_t upload_post_handler(httpd_req_t *req)
httpd_resp_set_hdr(req, "Location", directory.c_str());
httpd_resp_sendstr(req, "File uploaded successfully");
/*
if (strcmp(filepath, CONFIG_FILE) == 0) {
printf("New config foung. Reload handler.");
printf("New config found. Reload handler.");
gpio_handler_deinit();
MQTTdestroy();
}
*/
return ESP_OK;
}
@@ -606,10 +608,10 @@ static esp_err_t delete_post_handler(httpd_req_t *req)
int start_fn = strlen(((struct file_server_data *)req->user_ctx)->base_path);
printf("Directory: %s, start_fn: %d, found: %d\n", directory.c_str(), start_fn, found);
directory = directory.substr(start_fn, found - start_fn + 1);
printf("Directory danach: %s\n", directory.c_str());
printf("Directory danach 3: %s\n", directory.c_str());
directory = "/fileserver" + directory;
printf("Directory danach: %s\n", directory.c_str());
printf("Directory danach 4: %s\n", directory.c_str());
}

View File

@@ -111,6 +111,8 @@ esp_err_t set_content_type_from_file(httpd_req_t *req, const char *filename)
return httpd_resp_set_type(req, "image/jpeg");
} else if (IS_FILE_EXT(filename, ".ico")) {
return httpd_resp_set_type(req, "image/x-icon");
} else if (IS_FILE_EXT(filename, ".js")) {
return httpd_resp_set_type(req, "text/javascript");
}
/* This is a limited set only */
/* For any other type always set as plain text */

View File

@@ -130,11 +130,9 @@ bool ClassFlow::getNextLine(FILE* pfile, string *rt)
*rt = trim(*rt);
while ((zw[0] == ';' || zw[0] == '#' || (rt->size() == 0)) && !(zw[1] == '[')) // Kommentarzeilen (; oder #) und Leerzeilen überspringen, es sei denn es ist ein neuer auskommentierter Paragraph
{
*rt = "";
if (!fgets(zw, 1024, pfile))
{
*rt = "";
return false;
}
printf("%s", zw);
*rt = zw;
*rt = trim(*rt);

View File

@@ -1,487 +0,0 @@
#include "ClassFlowAnalog.h"
#include <math.h>
#include <iomanip>
#include <sys/types.h>
#include <sstream> // std::stringstream
// #define OHNETFLITE
#ifndef OHNETFLITE
#include "CTfLiteClass.h"
#endif
#include "ClassLogFile.h"
static const char* TAG = "flow_analog";
bool debugdetailanalog = false;
void ClassFlowAnalog::SetInitialParameter(void)
{
string cnnmodelfile = "";
modelxsize = 1;
modelysize = 1;
ListFlowControll = NULL;
previousElement = NULL;
SaveAllFiles = false;
disabled = false;
extendedResolution = false;
}
ClassFlowAnalog::ClassFlowAnalog(std::vector<ClassFlow*>* lfc) : ClassFlowImage(lfc, TAG)
{
SetInitialParameter();
ListFlowControll = lfc;
for (int i = 0; i < ListFlowControll->size(); ++i)
{
if (((*ListFlowControll)[i])->name().compare("ClassFlowAlignment") == 0)
{
flowpostalignment = (ClassFlowAlignment*) (*ListFlowControll)[i];
}
}
}
int ClassFlowAnalog::AnzahlROIs(int _analog = 0)
{
int zw = ANALOG[_analog]->ROI.size();
if (extendedResolution)
zw++;
return zw;
}
string ClassFlowAnalog::getReadout(int _analog = 0)
{
string result = "";
if (ANALOG[_analog]->ROI.size() == 0)
return result;
float zahl = ANALOG[_analog]->ROI[ANALOG[_analog]->ROI.size() - 1]->result;
int ergebnis_nachkomma = ((int) floor(zahl * 10)) % 10;
int prev = -1;
prev = ZeigerEval(ANALOG[_analog]->ROI[ANALOG[_analog]->ROI.size() - 1]->result, prev);
result = std::to_string(prev);
if (extendedResolution)
result = result + std::to_string(ergebnis_nachkomma);
for (int i = ANALOG[_analog]->ROI.size() - 2; i >= 0; --i)
{
prev = ZeigerEval(ANALOG[_analog]->ROI[i]->result, prev);
result = std::to_string(prev) + result;
}
return result;
}
int ClassFlowAnalog::ZeigerEval(float zahl, int ziffer_vorgaenger)
{
int ergebnis_nachkomma = ((int) floor(zahl * 10)) % 10;
int ergebnis_vorkomma = ((int) floor(zahl)) % 10;
int ergebnis, ergebnis_rating;
if (ziffer_vorgaenger == -1)
return ergebnis_vorkomma % 10;
ergebnis_rating = ergebnis_nachkomma - ziffer_vorgaenger;
if (ergebnis_nachkomma >= 5)
ergebnis_rating-=5;
else
ergebnis_rating+=5;
ergebnis = (int) round(zahl);
if (ergebnis_rating < 0)
ergebnis-=1;
if (ergebnis == -1)
ergebnis+=10;
ergebnis = ergebnis % 10;
return ergebnis;
}
bool ClassFlowAnalog::ReadParameter(FILE* pfile, string& aktparamgraph)
{
std::vector<string> zerlegt;
aktparamgraph = trim(aktparamgraph);
if (aktparamgraph.size() == 0)
if (!this->GetNextParagraph(pfile, aktparamgraph))
return false;
if ((aktparamgraph.compare("[Analog]") != 0) && (aktparamgraph.compare(";[Analog]") != 0)) // Paragraph passt nich zu MakeImage
return false;
if (aktparamgraph[0] == ';')
{
disabled = true;
while (getNextLine(pfile, &aktparamgraph) && !isNewParagraph(aktparamgraph));
printf("[Analog] is disabled !!!\n");
return true;
}
while (this->getNextLine(pfile, &aktparamgraph) && !this->isNewParagraph(aktparamgraph))
{
zerlegt = this->ZerlegeZeile(aktparamgraph);
if ((zerlegt[0] == "LogImageLocation") && (zerlegt.size() > 1))
{
this->LogImageLocation = "/sdcard" + zerlegt[1];
this->isLogImage = true;
}
if ((toUpper(zerlegt[0]) == "LOGFILERETENTIONINDAYS") && (zerlegt.size() > 1))
{
this->logfileRetentionInDays = std::stoi(zerlegt[1]);
}
if ((zerlegt[0] == "Model") && (zerlegt.size() > 1))
{
this->cnnmodelfile = zerlegt[1];
}
if ((zerlegt[0] == "ModelInputSize") && (zerlegt.size() > 2))
{
this->modelxsize = std::stoi(zerlegt[1]);
this->modelysize = std::stoi(zerlegt[2]);
}
if (zerlegt.size() >= 5)
{
analog* _analog = GetANALOG(zerlegt[0], true);
roianalog* neuroi = _analog->ROI[_analog->ROI.size()-1];
neuroi->posx = std::stoi(zerlegt[1]);
neuroi->posy = std::stoi(zerlegt[2]);
neuroi->deltax = std::stoi(zerlegt[3]);
neuroi->deltay = std::stoi(zerlegt[4]);
neuroi->result = -1;
neuroi->image = NULL;
neuroi->image_org = NULL;
// ROI.push_back(neuroi);
}
if ((toUpper(zerlegt[0]) == "SAVEALLFILES") && (zerlegt.size() > 1))
{
if (toUpper(zerlegt[1]) == "TRUE")
SaveAllFiles = true;
}
if ((toUpper(zerlegt[0]) == "EXTENDEDRESOLUTION") && (zerlegt.size() > 1))
{
if (toUpper(zerlegt[1]) == "TRUE")
extendedResolution = true;
}
}
for (int _ana = 0; _ana < ANALOG.size(); ++_ana)
for (int i = 0; i < ANALOG[_ana]->ROI.size(); ++i)
{
ANALOG[_ana]->ROI[i]->image = new CImageBasis(modelxsize, modelysize, 3);
ANALOG[_ana]->ROI[i]->image_org = new CImageBasis(ANALOG[_ana]->ROI[i]->deltax, ANALOG[_ana]->ROI[i]->deltay, 3);
}
return true;
}
analog* ClassFlowAnalog::FindANALOG(string _name_number)
{
for (int i = 0; i < ANALOG.size(); ++i)
{
if (ANALOG[i]->name == _name_number)
return ANALOG[i];
}
return NULL;
}
analog* ClassFlowAnalog::GetANALOG(string _name, bool _create = true)
{
string _analog, _roi;
int _pospunkt = _name.find_first_of(".");
// printf("Name: %s, Pospunkt: %d\n", _name.c_str(), _pospunkt);
if (_pospunkt > -1)
{
_analog = _name.substr(0, _pospunkt);
_roi = _name.substr(_pospunkt+1, _name.length() - _pospunkt - 1);
}
else
{
_analog = "default";
_roi = _name;
}
analog *_ret = NULL;
for (int i = 0; i < ANALOG.size(); ++i)
{
if (ANALOG[i]->name == _analog)
_ret = ANALOG[i];
}
if (!_create) // nicht gefunden und soll auch nicht erzeugt werden
return _ret;
if (_ret == NULL)
{
_ret = new analog;
_ret->name = _analog;
ANALOG.push_back(_ret);
}
roianalog* neuroi = new roianalog;
neuroi->name = _roi;
_ret->ROI.push_back(neuroi);
printf("GetANALOG - ANALOG %s - roi %s\n", _analog.c_str(), _roi.c_str());
return _ret;
}
string ClassFlowAnalog::getHTMLSingleStep(string host)
{
string result, zw;
std::vector<HTMLInfo*> htmlinfo;
result = "<p>Found ROIs: </p> <p><img src=\"" + host + "/img_tmp/alg_roi.jpg\"></p>\n";
result = result + "Analog Pointers: <p> ";
htmlinfo = GetHTMLInfo();
for (int i = 0; i < htmlinfo.size(); ++i)
{
std::stringstream stream;
stream << std::fixed << std::setprecision(1) << htmlinfo[i]->val;
zw = stream.str();
result = result + "<img src=\"" + host + "/img_tmp/" + htmlinfo[i]->filename + "\"> " + zw;
delete htmlinfo[i];
}
htmlinfo.clear();
return result;
}
bool ClassFlowAnalog::doFlow(string time)
{
if (disabled)
return true;
if (!doAlignAndCut(time)){
return false;
};
if (debugdetailanalog) LogFile.WriteToFile("ClassFlowAnalog::doFlow nach Alignment");
doNeuralNetwork(time);
RemoveOldLogs();
return true;
}
bool ClassFlowAnalog::doAlignAndCut(string time)
{
if (disabled)
return true;
CAlignAndCutImage *caic = flowpostalignment->GetAlignAndCutImage();
for (int _ana = 0; _ana < ANALOG.size(); ++_ana)
for (int i = 0; i < ANALOG[_ana]->ROI.size(); ++i)
{
printf("Analog %d - Align&Cut\n", i);
caic->CutAndSave(ANALOG[_ana]->ROI[i]->posx, ANALOG[_ana]->ROI[i]->posy, ANALOG[_ana]->ROI[i]->deltax, ANALOG[_ana]->ROI[i]->deltay, ANALOG[_ana]->ROI[i]->image_org);
if (SaveAllFiles)
{
if (ANALOG[_ana]->name == "default")
ANALOG[_ana]->ROI[i]->image_org->SaveToFile(FormatFileName("/sdcard/img_tmp/" + ANALOG[_ana]->ROI[i]->name + ".jpg"));
else
ANALOG[_ana]->ROI[i]->image_org->SaveToFile(FormatFileName("/sdcard/img_tmp/" + ANALOG[_ana]->name + "_" + ANALOG[_ana]->ROI[i]->name + ".jpg"));
}
ANALOG[_ana]->ROI[i]->image_org->Resize(modelxsize, modelysize, ANALOG[_ana]->ROI[i]->image);
if (SaveAllFiles)
{
if (ANALOG[_ana]->name == "default")
ANALOG[_ana]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + ANALOG[_ana]->ROI[i]->name + ".bmp"));
else
ANALOG[_ana]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + ANALOG[_ana]->name + "_" + ANALOG[_ana]->ROI[i]->name + ".bmp"));
}
}
return true;
}
void ClassFlowAnalog::DrawROI(CImageBasis *_zw)
{
int r = 0;
int g = 255;
int b = 0;
for (int _ana = 0; _ana < ANALOG.size(); ++_ana)
for (int i = 0; i < ANALOG[_ana]->ROI.size(); ++i)
{
_zw->drawRect(ANALOG[_ana]->ROI[i]->posx, ANALOG[_ana]->ROI[i]->posy, ANALOG[_ana]->ROI[i]->deltax, ANALOG[_ana]->ROI[i]->deltay, r, g, b, 1);
_zw->drawCircle((int) (ANALOG[_ana]->ROI[i]->posx + ANALOG[_ana]->ROI[i]->deltax/2), (int) (ANALOG[_ana]->ROI[i]->posy + ANALOG[_ana]->ROI[i]->deltay/2), (int) (ANALOG[_ana]->ROI[i]->deltax/2), r, g, b, 2);
_zw->drawLine((int) (ANALOG[_ana]->ROI[i]->posx + ANALOG[_ana]->ROI[i]->deltax/2), (int) ANALOG[_ana]->ROI[i]->posy, (int) (ANALOG[_ana]->ROI[i]->posx + ANALOG[_ana]->ROI[i]->deltax/2), (int) (ANALOG[_ana]->ROI[i]->posy + ANALOG[_ana]->ROI[i]->deltay), r, g, b, 2);
_zw->drawLine((int) ANALOG[_ana]->ROI[i]->posx, (int) (ANALOG[_ana]->ROI[i]->posy + ANALOG[_ana]->ROI[i]->deltay/2), (int) ANALOG[_ana]->ROI[i]->posx + ANALOG[_ana]->ROI[i]->deltax, (int) (ANALOG[_ana]->ROI[i]->posy + ANALOG[_ana]->ROI[i]->deltay/2), r, g, b, 2);
}
}
bool ClassFlowAnalog::doNeuralNetwork(string time)
{
if (disabled)
return true;
string logPath = CreateLogFolder(time);
string input = "/sdcard/img_tmp/alg.jpg";
string ioresize = "/sdcard/img_tmp/resize.bmp";
string output;
input = FormatFileName(input);
#ifndef OHNETFLITE
CTfLiteClass *tflite = new CTfLiteClass;
string zwcnn = "/sdcard" + cnnmodelfile;
zwcnn = FormatFileName(zwcnn);
printf(zwcnn.c_str());printf("\n");
if (!tflite->LoadModel(zwcnn)) {
printf("Can't read model file /sdcard%s\n", cnnmodelfile.c_str());
delete tflite;
return false;
}
tflite->MakeAllocate();
#endif
for (int _ana = 0; _ana < ANALOG.size(); ++_ana)
{
for (int i = 0; i < ANALOG[_ana]->ROI.size(); ++i)
{
printf("Analog %d - TfLite\n", i);
float f1, f2;
f1 = 0; f2 = 0;
#ifndef OHNETFLITE
tflite->LoadInputImageBasis(ANALOG[_ana]->ROI[i]->image);
tflite->Invoke();
if (debugdetailanalog) LogFile.WriteToFile("Nach Invoke");
f1 = tflite->GetOutputValue(0);
f2 = tflite->GetOutputValue(1);
#endif
float result = fmod(atan2(f1, f2) / (M_PI * 2) + 2, 1);
// printf("Result sin, cos, ziffer: %f, %f, %f\n", f1, f2, result);
ANALOG[_ana]->ROI[i]->result = result * 10;
printf("Result Analog%i: %f\n", i, ANALOG[_ana]->ROI[i]->result);
if (isLogImage)
{
LogImage(logPath, ANALOG[_ana]->ROI[i]->name, &ANALOG[_ana]->ROI[i]->result, NULL, time, ANALOG[_ana]->ROI[i]->image_org);
}
}
}
#ifndef OHNETFLITE
delete tflite;
#endif
return true;
}
std::vector<HTMLInfo*> ClassFlowAnalog::GetHTMLInfo()
{
std::vector<HTMLInfo*> result;
for (int _ana = 0; _ana < ANALOG.size(); ++_ana)
for (int i = 0; i < ANALOG[_ana]->ROI.size(); ++i)
{
if (ANALOG[_ana]->name == "default")
ANALOG[_ana]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + ANALOG[_ana]->ROI[i]->name + ".bmp"));
else
ANALOG[_ana]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + ANALOG[_ana]->name + "_" + ANALOG[_ana]->ROI[i]->name + ".bmp"));
HTMLInfo *zw = new HTMLInfo;
if (ANALOG[_ana]->name == "default")
{
zw->filename = ANALOG[_ana]->ROI[i]->name + ".bmp";
zw->filename_org = ANALOG[_ana]->ROI[i]->name + ".jpg";
}
else
{
zw->filename = ANALOG[_ana]->name + "_" + ANALOG[_ana]->ROI[i]->name + ".bmp";
zw->filename_org = ANALOG[_ana]->name + "_" + ANALOG[_ana]->ROI[i]->name + ".jpg";
}
zw->val = ANALOG[_ana]->ROI[i]->result;
zw->image = ANALOG[_ana]->ROI[i]->image;
zw->image_org = ANALOG[_ana]->ROI[i]->image_org;
result.push_back(zw);
}
return result;
}
int ClassFlowAnalog::getAnzahlANALOG()
{
return ANALOG.size();
}
string ClassFlowAnalog::getNameANALOG(int _analog)
{
if (_analog < ANALOG.size())
return ANALOG[_analog]->name;
return "ANALOG DOES NOT EXIST";
}
analog* ClassFlowAnalog::GetANALOG(int _analog)
{
if (_analog < ANALOG.size())
return ANALOG[_analog];
return NULL;
}
void ClassFlowAnalog::UpdateNameNumbers(std::vector<std::string> *_name_numbers)
{
for (int _dig = 0; _dig < ANALOG.size(); _dig++)
{
std::string _name = ANALOG[_dig]->name;
bool found = false;
for (int i = 0; i < (*_name_numbers).size(); ++i)
{
if ((*_name_numbers)[i] == _name)
found = true;
}
if (!found)
(*_name_numbers).push_back(_name);
}
}

View File

@@ -1,65 +0,0 @@
#pragma once
#include "ClassFlowImage.h"
#include "ClassFlowAlignment.h"
// #include "CTfLiteClass.h"
struct roianalog {
int posx, posy, deltax, deltay;
float result;
CImageBasis *image, *image_org;
string name;
};
struct analog {
string name;
std::vector<roianalog*> ROI;
};
class ClassFlowAnalog :
public ClassFlowImage
{
protected:
// std::vector<roianalog*> ROI;
std::vector<analog*> ANALOG;
string cnnmodelfile;
int modelxsize, modelysize;
int ZeigerEval(float zahl, int ziffer_vorgaenger);
bool SaveAllFiles;
ClassFlowAlignment* flowpostalignment;
void SetInitialParameter(void);
public:
bool extendedResolution;
ClassFlowAnalog(std::vector<ClassFlow*>* lfc);
bool ReadParameter(FILE* pfile, string& aktparamgraph);
bool doFlow(string time);
string getHTMLSingleStep(string host);
string getReadout(int _analog);
void DrawROI(CImageBasis *_zw);
bool doNeuralNetwork(string time);
bool doAlignAndCut(string time);
std::vector<HTMLInfo*> GetHTMLInfo();
int AnzahlROIs(int _analog);
int getAnzahlANALOG();
analog* GetANALOG(int _analog);
analog* GetANALOG(string _name, bool _create);
analog* FindANALOG(string _name_number);
string getNameANALOG(int _analog);
void UpdateNameNumbers(std::vector<std::string> *_name_numbers);
string name(){return "ClassFlowAnalog";};
};

View File

@@ -0,0 +1,662 @@
#include "ClassFlowCNNGeneral.h"
#include <math.h>
#include <iomanip>
#include <sys/types.h>
#include <sstream> // std::stringstream
#include "CTfLiteClass.h"
#include "ClassLogFile.h"
static const char* TAG = "flow_analog";
bool debugdetailgeneral = false;
ClassFlowCNNGeneral::ClassFlowCNNGeneral(ClassFlowAlignment *_flowalign, t_CNNType _cnntype) : ClassFlowImage(NULL, TAG)
{
string cnnmodelfile = "";
modelxsize = 1;
modelysize = 1;
ListFlowControll = NULL;
previousElement = NULL;
SaveAllFiles = false;
disabled = false;
// extendedResolution = false;
isLogImageSelect = false;
CNNType = AutoDetect;
CNNType = _cnntype;
flowpostalignment = _flowalign;
}
/*
int ClassFlowCNNGeneral::AnzahlROIs(int _analog = 0)
{
int zw = GENERAL[_analog]->ROI.size();
if (extendedResolution && (CNNType != Digital)) zw++; // da letzte Ziffer inkl Nachhkomma, es sei denn, das Nachkomma gibt es nicht (Digital)
return zw;
}
*/
string ClassFlowCNNGeneral::getReadout(int _analog = 0, bool _extendedResolution = false)
{
string result = "";
if (GENERAL[_analog]->ROI.size() == 0)
return result;
if (CNNType == Analogue)
{
float zahl = GENERAL[_analog]->ROI[GENERAL[_analog]->ROI.size() - 1]->result_float;
int ergebnis_nachkomma = ((int) floor(zahl * 10) + 10) % 10;
int prev = -1;
prev = ZeigerEval(GENERAL[_analog]->ROI[GENERAL[_analog]->ROI.size() - 1]->result_float, prev);
result = std::to_string(prev);
if (_extendedResolution && (CNNType != Digital))
result = result + std::to_string(ergebnis_nachkomma);
for (int i = GENERAL[_analog]->ROI.size() - 2; i >= 0; --i)
{
prev = ZeigerEval(GENERAL[_analog]->ROI[i]->result_float, prev);
result = std::to_string(prev) + result;
}
}
if (CNNType == Digital)
{
for (int i = 0; i < GENERAL[_analog]->ROI.size(); ++i)
{
if (GENERAL[_analog]->ROI[i]->result_klasse >= 10)
result = result + "N";
else
result = result + std::to_string(GENERAL[_analog]->ROI[i]->result_klasse);
}
}
if (CNNType == DigitalHyprid)
{
// int ergebnis_nachkomma = -1;
int zif_akt = -1;
float zahl = GENERAL[_analog]->ROI[GENERAL[_analog]->ROI.size() - 1]->result_float;
if (zahl >= 0) // NaN?
{
if (_extendedResolution)
{
int ergebnis_nachkomma = ((int) floor(zahl * 10)) % 10;
int ergebnis_vorkomma = ((int) floor(zahl)) % 10;
result = std::to_string(ergebnis_vorkomma) + std::to_string(ergebnis_nachkomma);
zif_akt = ergebnis_vorkomma;
}
else
{
zif_akt = ZeigerEvalHybrid(GENERAL[_analog]->ROI[GENERAL[_analog]->ROI.size() - 1]->result_float, -1, -1);
result = std::to_string(zif_akt);
}
}
else
{
result = "N";
if (_extendedResolution && (CNNType != Digital))
result = "NN";
}
for (int i = GENERAL[_analog]->ROI.size() - 2; i >= 0; --i)
{
if (GENERAL[_analog]->ROI[i]->result_float >= 0)
{
zif_akt = ZeigerEvalHybrid(GENERAL[_analog]->ROI[i]->result_float, GENERAL[_analog]->ROI[i+1]->result_float, zif_akt);
result = std::to_string(zif_akt) + result;
}
else
{
zif_akt = -1;
result = "N" + result;
}
}
}
return result;
}
int ClassFlowCNNGeneral::ZeigerEvalHybrid(float zahl, float zahl_vorgaenger, int eval_vorgaenger)
{
int ergebnis_nachkomma = ((int) floor(zahl * 10)) % 10;
// int ergebnis_vorkomma = ((int) floor(zahl)) % 10;
if (zahl_vorgaenger < 0) // keine Vorzahl vorhanden !!! --> Runde die Zahl
{
if ((ergebnis_nachkomma <= 2) || (ergebnis_nachkomma >= 8)) // Band um die Ziffer --> Runden, da Ziffer im Rahmen Ungenauigkeit erreicht
return ((int) round(zahl) + 10) % 10;
else
return ((int) trunc(zahl) + 10) % 10;
}
if (zahl_vorgaenger > 9.2) // Ziffernwechsel beginnt
{
if (eval_vorgaenger == 0) // Wechsel hat schon stattgefunden
{
return ((int) round(zahl) + 10) % 10; // Annahme, dass die neue Zahl schon in der Nähe des Ziels ist
}
else
{
if (zahl_vorgaenger <= 9.5) // Wechsel startet gerade, aber beginnt erst
{
if ((ergebnis_nachkomma <= 2) || (ergebnis_nachkomma >= 8)) // Band um die Ziffer --> Runden, da Ziffer im Rahmen Ungenauigkeit erreicht
return ((int) round(zahl) + 10) % 10;
else
return ((int) trunc(zahl) + 10) % 10;
}
else
{
return ((int) trunc(zahl) + 10) % 10; // Wechsel schon weiter fortgeschritten, d.h. über 2 als Nachkomma
}
}
}
if ((ergebnis_nachkomma <= 2) || (ergebnis_nachkomma >= 8)) // Band um die Ziffer --> Runden, da Ziffer im Rahmen Ungenauigkeit erreicht
return ((int) round(zahl) + 10) % 10;
return ((int) trunc(zahl) + 10) % 10;
}
int ClassFlowCNNGeneral::ZeigerEval(float zahl, int ziffer_vorgaenger)
{
int ergebnis_nachkomma = ((int) floor(zahl * 10) + 10) % 10;
int ergebnis_vorkomma = ((int) floor(zahl) + 10) % 10;
int ergebnis, ergebnis_rating;
if (ziffer_vorgaenger == -1)
return ergebnis_vorkomma % 10;
ergebnis_rating = ergebnis_nachkomma - ziffer_vorgaenger;
if (ergebnis_nachkomma >= 5)
ergebnis_rating-=5;
else
ergebnis_rating+=5;
ergebnis = (int) round(zahl);
if (ergebnis_rating < 0)
ergebnis-=1;
if (ergebnis == -1)
ergebnis+=10;
ergebnis = (ergebnis + 10) % 10;
return ergebnis;
}
bool ClassFlowCNNGeneral::ReadParameter(FILE* pfile, string& aktparamgraph)
{
std::vector<string> zerlegt;
aktparamgraph = trim(aktparamgraph);
if (aktparamgraph.size() == 0)
if (!this->GetNextParagraph(pfile, aktparamgraph))
return false;
if ((toUpper(aktparamgraph) != "[ANALOG]") && (toUpper(aktparamgraph) != ";[ANALOG]")
&& (toUpper(aktparamgraph) != "[DIGIT]") && (toUpper(aktparamgraph) != ";[DIGIT]")
&& (toUpper(aktparamgraph) != "[DIGITS]") && (toUpper(aktparamgraph) != ";[DIGITS]")
) // Paragraph passt nicht
return false;
/*
if ((aktparamgraph.compare("[Analog]") != 0) && (aktparamgraph.compare(";[Analog]") != 0)
&& (aktparamgraph.compare("[Digit]") != 0) && (aktparamgraph.compare(";[Digit]"))) // Paragraph passt nicht
return false;
*/
if (aktparamgraph[0] == ';')
{
disabled = true;
while (getNextLine(pfile, &aktparamgraph) && !isNewParagraph(aktparamgraph));
printf("[Analog/Digit] is disabled !!!\n");
return true;
}
while (this->getNextLine(pfile, &aktparamgraph) && !this->isNewParagraph(aktparamgraph))
{
zerlegt = this->ZerlegeZeile(aktparamgraph);
if ((zerlegt[0] == "LogImageLocation") && (zerlegt.size() > 1))
{
this->LogImageLocation = "/sdcard" + zerlegt[1];
this->isLogImage = true;
}
if ((zerlegt[0] == "LogImageSelect") && (zerlegt.size() > 1))
{
LogImageSelect = zerlegt[1];
isLogImageSelect = true;
}
if ((toUpper(zerlegt[0]) == "LOGFILERETENTIONINDAYS") && (zerlegt.size() > 1))
{
this->logfileRetentionInDays = std::stoi(zerlegt[1]);
}
if ((toUpper(zerlegt[0]) == "MODELTYPE") && (zerlegt.size() > 1))
{
if (toUpper(zerlegt[1]) == "DIGITHYPRID")
CNNType = DigitalHyprid;
}
if ((zerlegt[0] == "Model") && (zerlegt.size() > 1))
{
this->cnnmodelfile = zerlegt[1];
}
if ((zerlegt[0] == "ModelInputSize") && (zerlegt.size() > 2))
{
this->modelxsize = std::stoi(zerlegt[1]);
this->modelysize = std::stoi(zerlegt[2]);
}
if (zerlegt.size() >= 5)
{
general* _analog = GetGENERAL(zerlegt[0], true);
roi* neuroi = _analog->ROI[_analog->ROI.size()-1];
neuroi->posx = std::stoi(zerlegt[1]);
neuroi->posy = std::stoi(zerlegt[2]);
neuroi->deltax = std::stoi(zerlegt[3]);
neuroi->deltay = std::stoi(zerlegt[4]);
neuroi->result_float = -1;
neuroi->image = NULL;
neuroi->image_org = NULL;
}
if ((toUpper(zerlegt[0]) == "SAVEALLFILES") && (zerlegt.size() > 1))
{
if (toUpper(zerlegt[1]) == "TRUE")
SaveAllFiles = true;
}
/*
if ((toUpper(zerlegt[0]) == "EXTENDEDRESOLUTION") && (zerlegt.size() > 1))
{
if (toUpper(zerlegt[1]) == "TRUE")
extendedResolution = true;
}
*/
}
for (int _ana = 0; _ana < GENERAL.size(); ++_ana)
for (int i = 0; i < GENERAL[_ana]->ROI.size(); ++i)
{
GENERAL[_ana]->ROI[i]->image = new CImageBasis(modelxsize, modelysize, 3);
GENERAL[_ana]->ROI[i]->image_org = new CImageBasis(GENERAL[_ana]->ROI[i]->deltax, GENERAL[_ana]->ROI[i]->deltay, 3);
}
return true;
}
general* ClassFlowCNNGeneral::FindGENERAL(string _name_number)
{
for (int i = 0; i < GENERAL.size(); ++i)
if (GENERAL[i]->name == _name_number)
return GENERAL[i];
return NULL;
}
general* ClassFlowCNNGeneral::GetGENERAL(string _name, bool _create = true)
{
string _analog, _roi;
int _pospunkt = _name.find_first_of(".");
if (_pospunkt > -1)
{
_analog = _name.substr(0, _pospunkt);
_roi = _name.substr(_pospunkt+1, _name.length() - _pospunkt - 1);
}
else
{
_analog = "default";
_roi = _name;
}
general *_ret = NULL;
for (int i = 0; i < GENERAL.size(); ++i)
if (GENERAL[i]->name == _analog)
_ret = GENERAL[i];
if (!_create) // nicht gefunden und soll auch nicht erzeugt werden
return _ret;
if (_ret == NULL)
{
_ret = new general;
_ret->name = _analog;
GENERAL.push_back(_ret);
}
roi* neuroi = new roi;
neuroi->name = _roi;
_ret->ROI.push_back(neuroi);
printf("GetGENERAL - GENERAL %s - roi %s\n", _analog.c_str(), _roi.c_str());
return _ret;
}
string ClassFlowCNNGeneral::getHTMLSingleStep(string host)
{
string result, zw;
std::vector<HTMLInfo*> htmlinfo;
result = "<p>Found ROIs: </p> <p><img src=\"" + host + "/img_tmp/alg_roi.jpg\"></p>\n";
result = result + "Analog Pointers: <p> ";
htmlinfo = GetHTMLInfo();
for (int i = 0; i < htmlinfo.size(); ++i)
{
std::stringstream stream;
stream << std::fixed << std::setprecision(1) << htmlinfo[i]->val;
zw = stream.str();
result = result + "<img src=\"" + host + "/img_tmp/" + htmlinfo[i]->filename + "\"> " + zw;
delete htmlinfo[i];
}
htmlinfo.clear();
return result;
}
bool ClassFlowCNNGeneral::doFlow(string time)
{
if (disabled)
return true;
if (!doAlignAndCut(time)){
return false;
};
if (debugdetailgeneral) LogFile.WriteToFile("ClassFlowCNNGeneral::doFlow nach Alignment");
doNeuralNetwork(time);
RemoveOldLogs();
return true;
}
bool ClassFlowCNNGeneral::doAlignAndCut(string time)
{
if (disabled)
return true;
CAlignAndCutImage *caic = flowpostalignment->GetAlignAndCutImage();
for (int _ana = 0; _ana < GENERAL.size(); ++_ana)
for (int i = 0; i < GENERAL[_ana]->ROI.size(); ++i)
{
printf("General %d - Align&Cut\n", i);
caic->CutAndSave(GENERAL[_ana]->ROI[i]->posx, GENERAL[_ana]->ROI[i]->posy, GENERAL[_ana]->ROI[i]->deltax, GENERAL[_ana]->ROI[i]->deltay, GENERAL[_ana]->ROI[i]->image_org);
if (SaveAllFiles)
{
if (GENERAL[_ana]->name == "default")
GENERAL[_ana]->ROI[i]->image_org->SaveToFile(FormatFileName("/sdcard/img_tmp/" + GENERAL[_ana]->ROI[i]->name + ".jpg"));
else
GENERAL[_ana]->ROI[i]->image_org->SaveToFile(FormatFileName("/sdcard/img_tmp/" + GENERAL[_ana]->name + "_" + GENERAL[_ana]->ROI[i]->name + ".jpg"));
}
GENERAL[_ana]->ROI[i]->image_org->Resize(modelxsize, modelysize, GENERAL[_ana]->ROI[i]->image);
if (SaveAllFiles)
{
if (GENERAL[_ana]->name == "default")
GENERAL[_ana]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + GENERAL[_ana]->ROI[i]->name + ".bmp"));
else
GENERAL[_ana]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + GENERAL[_ana]->name + "_" + GENERAL[_ana]->ROI[i]->name + ".bmp"));
}
}
return true;
}
void ClassFlowCNNGeneral::DrawROI(CImageBasis *_zw)
{
if (CNNType == Analogue)
{
int r = 0;
int g = 255;
int b = 0;
for (int _ana = 0; _ana < GENERAL.size(); ++_ana)
for (int i = 0; i < GENERAL[_ana]->ROI.size(); ++i)
{
_zw->drawRect(GENERAL[_ana]->ROI[i]->posx, GENERAL[_ana]->ROI[i]->posy, GENERAL[_ana]->ROI[i]->deltax, GENERAL[_ana]->ROI[i]->deltay, r, g, b, 1);
_zw->drawCircle((int) (GENERAL[_ana]->ROI[i]->posx + GENERAL[_ana]->ROI[i]->deltax/2), (int) (GENERAL[_ana]->ROI[i]->posy + GENERAL[_ana]->ROI[i]->deltay/2), (int) (GENERAL[_ana]->ROI[i]->deltax/2), r, g, b, 2);
_zw->drawLine((int) (GENERAL[_ana]->ROI[i]->posx + GENERAL[_ana]->ROI[i]->deltax/2), (int) GENERAL[_ana]->ROI[i]->posy, (int) (GENERAL[_ana]->ROI[i]->posx + GENERAL[_ana]->ROI[i]->deltax/2), (int) (GENERAL[_ana]->ROI[i]->posy + GENERAL[_ana]->ROI[i]->deltay), r, g, b, 2);
_zw->drawLine((int) GENERAL[_ana]->ROI[i]->posx, (int) (GENERAL[_ana]->ROI[i]->posy + GENERAL[_ana]->ROI[i]->deltay/2), (int) GENERAL[_ana]->ROI[i]->posx + GENERAL[_ana]->ROI[i]->deltax, (int) (GENERAL[_ana]->ROI[i]->posy + GENERAL[_ana]->ROI[i]->deltay/2), r, g, b, 2);
}
}
else
{
for (int _dig = 0; _dig < GENERAL.size(); ++_dig)
for (int i = 0; i < GENERAL[_dig]->ROI.size(); ++i)
_zw->drawRect(GENERAL[_dig]->ROI[i]->posx, GENERAL[_dig]->ROI[i]->posy, GENERAL[_dig]->ROI[i]->deltax, GENERAL[_dig]->ROI[i]->deltay, 0, 0, (255 - _dig*100), 2);
}
}
bool ClassFlowCNNGeneral::doNeuralNetwork(string time)
{
if (disabled)
return true;
string logPath = CreateLogFolder(time);
CTfLiteClass *tflite = new CTfLiteClass;
string zwcnn = "/sdcard" + cnnmodelfile;
zwcnn = FormatFileName(zwcnn);
printf(zwcnn.c_str());printf("\n");
if (!tflite->LoadModel(zwcnn)) {
printf("Can't read model file /sdcard%s\n", cnnmodelfile.c_str());
LogFile.WriteToFile("Cannot load model");
delete tflite;
return false;
}
tflite->MakeAllocate();
if (CNNType == AutoDetect)
{
int _anzoutputdimensions = tflite->GetAnzOutPut();
switch (_anzoutputdimensions)
{
case 2:
CNNType = Analogue;
printf("TFlite-Type set to Analogue\n");
break;
case 11:
CNNType = Digital;
printf("TFlite-Type set to Digital\n");
break;
case 22:
CNNType = DigitalHyprid;
printf("TFlite-Type set to DigitalHyprid\n");
break;
default:
printf("ERROR ERROR ERROR - tflite passt nicht zur Firmware - ERROR ERROR ERROR\n");
}
// flowpostprocessing->UpdateNachkommaDecimalShift();
}
for (int _ana = 0; _ana < GENERAL.size(); ++_ana)
{
for (int i = 0; i < GENERAL[_ana]->ROI.size(); ++i)
{
printf("General %d - TfLite\n", i);
switch (CNNType) {
case Analogue:
{
float f1, f2;
f1 = 0; f2 = 0;
tflite->LoadInputImageBasis(GENERAL[_ana]->ROI[i]->image);
tflite->Invoke();
if (debugdetailgeneral) LogFile.WriteToFile("Nach Invoke");
f1 = tflite->GetOutputValue(0);
f2 = tflite->GetOutputValue(1);
float result = fmod(atan2(f1, f2) / (M_PI * 2) + 2, 1);
GENERAL[_ana]->ROI[i]->result_float = result * 10;
printf("Result General(Analog)%i: %f\n", i, GENERAL[_ana]->ROI[i]->result_float);
if (isLogImage)
LogImage(logPath, GENERAL[_ana]->ROI[i]->name, &GENERAL[_ana]->ROI[i]->result_float, NULL, time, GENERAL[_ana]->ROI[i]->image_org);
} break;
case Digital:
{
GENERAL[_ana]->ROI[i]->result_klasse = 0;
GENERAL[_ana]->ROI[i]->result_klasse = tflite->GetClassFromImageBasis(GENERAL[_ana]->ROI[i]->image);
printf("Result General(Digit)%i: %d\n", i, GENERAL[_ana]->ROI[i]->result_klasse);
if (isLogImage)
{
if (isLogImageSelect)
{
if (LogImageSelect.find(GENERAL[_ana]->ROI[i]->name) != std::string::npos)
LogImage(logPath, GENERAL[_ana]->ROI[i]->name, NULL, &GENERAL[_ana]->ROI[i]->result_klasse, time, GENERAL[_ana]->ROI[i]->image_org);
}
else
{
LogImage(logPath, GENERAL[_ana]->ROI[i]->name, NULL, &GENERAL[_ana]->ROI[i]->result_klasse, time, GENERAL[_ana]->ROI[i]->image_org);
}
}
} break;
case DigitalHyprid:
{
int _num, _nachkomma;
tflite->LoadInputImageBasis(GENERAL[_ana]->ROI[i]->image);
tflite->Invoke();
if (debugdetailgeneral) LogFile.WriteToFile("Nach Invoke");
_num = tflite->GetOutClassification(0, 10);
_nachkomma = tflite->GetOutClassification(11, 22);
string _zwres = "Nach Invoke - Nummer: " + to_string(_num) + " Nachkomma: " + to_string(_nachkomma);
if (debugdetailgeneral) LogFile.WriteToFile(_zwres);
if ((_num == 10) || (_nachkomma == 10)) // NaN detektiert
GENERAL[_ana]->ROI[i]->result_float = -1;
else
GENERAL[_ana]->ROI[i]->result_float = fmod((double) _num + (((double)_nachkomma)-5)/10 + (double) 10, 10);
printf("Result General(DigitalHyprid)%i: %f\n", i, GENERAL[_ana]->ROI[i]->result_float);
_zwres = "Result General(DigitalHyprid)" + to_string(i) + ": " + to_string(GENERAL[_ana]->ROI[i]->result_float);
if (debugdetailgeneral) LogFile.WriteToFile(_zwres);
if (isLogImage)
LogImage(logPath, GENERAL[_ana]->ROI[i]->name, &GENERAL[_ana]->ROI[i]->result_float, NULL, time, GENERAL[_ana]->ROI[i]->image_org);
} break;
default:
break;
}
}
}
delete tflite;
return true;
}
bool ClassFlowCNNGeneral::isExtendedResolution(int _number)
{
// if (extendedResolution && !(CNNType == Digital))
if (!(CNNType == Digital))
return true;
return false;
}
std::vector<HTMLInfo*> ClassFlowCNNGeneral::GetHTMLInfo()
{
std::vector<HTMLInfo*> result;
for (int _ana = 0; _ana < GENERAL.size(); ++_ana)
for (int i = 0; i < GENERAL[_ana]->ROI.size(); ++i)
{
if (GENERAL[_ana]->name == "default")
GENERAL[_ana]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + GENERAL[_ana]->ROI[i]->name + ".bmp"));
else
GENERAL[_ana]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + GENERAL[_ana]->name + "_" + GENERAL[_ana]->ROI[i]->name + ".bmp"));
HTMLInfo *zw = new HTMLInfo;
if (GENERAL[_ana]->name == "default")
{
zw->filename = GENERAL[_ana]->ROI[i]->name + ".bmp";
zw->filename_org = GENERAL[_ana]->ROI[i]->name + ".jpg";
}
else
{
zw->filename = GENERAL[_ana]->name + "_" + GENERAL[_ana]->ROI[i]->name + ".bmp";
zw->filename_org = GENERAL[_ana]->name + "_" + GENERAL[_ana]->ROI[i]->name + ".jpg";
}
if (CNNType == Digital)
zw->val = GENERAL[_ana]->ROI[i]->result_klasse;
else
zw->val = GENERAL[_ana]->ROI[i]->result_float;
zw->image = GENERAL[_ana]->ROI[i]->image;
zw->image_org = GENERAL[_ana]->ROI[i]->image_org;
// printf("Push %s\n", zw->filename.c_str());
result.push_back(zw);
}
// printf("größe: %d\n", result.size());
return result;
}
int ClassFlowCNNGeneral::getAnzahlGENERAL()
{
return GENERAL.size();
}
string ClassFlowCNNGeneral::getNameGENERAL(int _analog)
{
if (_analog < GENERAL.size())
return GENERAL[_analog]->name;
return "GENERAL DOES NOT EXIST";
}
general* ClassFlowCNNGeneral::GetGENERAL(int _analog)
{
if (_analog < GENERAL.size())
return GENERAL[_analog];
return NULL;
}
void ClassFlowCNNGeneral::UpdateNameNumbers(std::vector<std::string> *_name_numbers)
{
for (int _dig = 0; _dig < GENERAL.size(); _dig++)
{
std::string _name = GENERAL[_dig]->name;
bool found = false;
for (int i = 0; i < (*_name_numbers).size(); ++i)
{
if ((*_name_numbers)[i] == _name)
found = true;
}
if (!found)
(*_name_numbers).push_back(_name);
}
}

View File

@@ -0,0 +1,72 @@
#ifndef __CLASSCNNGENERAL__
#define __CLASSCNNGENERAL__
#include"ClassFlowDefineTypes.h"
#include "ClassFlowAlignment.h"
// #include "ClassFlowPostProcessing.h"
enum t_CNNType {
AutoDetect,
Analogue,
Digital,
DigitalHyprid,
None
};
class ClassFlowCNNGeneral :
public ClassFlowImage
{
protected:
t_CNNType CNNType;
std::vector<general*> GENERAL;
string cnnmodelfile;
int modelxsize, modelysize;
bool isLogImageSelect;
string LogImageSelect;
ClassFlowAlignment* flowpostalignment;
// ClassFlowPostProcessing *flowpostprocessing = NULL;
bool SaveAllFiles;
// bool extendedResolution;
int ZeigerEval(float zahl, int ziffer_vorgaenger);
int ZeigerEvalHybrid(float zahl, float zahl_vorgaenger, int eval_vorgaenger);
bool doNeuralNetwork(string time);
bool doAlignAndCut(string time);
public:
ClassFlowCNNGeneral(ClassFlowAlignment *_flowalign, t_CNNType _cnntype = AutoDetect);
bool ReadParameter(FILE* pfile, string& aktparamgraph);
bool doFlow(string time);
string getHTMLSingleStep(string host);
string getReadout(int _analog, bool _extendedResolution);
void DrawROI(CImageBasis *_zw);
std::vector<HTMLInfo*> GetHTMLInfo();
// int AnzahlROIs(int _analog);
int getAnzahlGENERAL();
general* GetGENERAL(int _analog);
general* GetGENERAL(string _name, bool _create);
general* FindGENERAL(string _name_number);
string getNameGENERAL(int _analog);
bool isExtendedResolution(int _number = 0);
// void setPostprocessing(ClassFlowPostProcessing *_fpp){flowpostprocessing = _fpp;};
void UpdateNameNumbers(std::vector<std::string> *_name_numbers);
t_CNNType getCNNType(){return CNNType;};
string name(){return "ClassFlowCNNGeneral";};
};
#endif

View File

@@ -12,6 +12,9 @@
#include "Helper.h"
#include "server_ota.h"
//#include "CImg.h"
#include "server_help.h"
//#define DEBUG_DETAIL_ON
@@ -32,10 +35,10 @@ std::string ClassFlowControll::doSingleStep(std::string _stepname, std::string _
if ((_stepname.compare(0, 7, "[Digits") == 0) || (_stepname.compare(0, 8, ";[Digits") == 0)) {
// if ((_stepname.compare("[Digits]") == 0) || (_stepname.compare(";[Digits]") == 0)){
// printf("Digits!!!\n");
_classname = "ClassFlowDigit";
_classname = "ClassFlowCNNGeneral";
}
if ((_stepname.compare("[Analog]") == 0) || (_stepname.compare(";[Analog]") == 0)){
_classname = "ClassFlowAnalog";
_classname = "ClassFlowCNNGeneral";
}
if ((_stepname.compare("[MQTT]") == 0) || (_stepname.compare(";[MQTT]") == 0)){
_classname = "ClassFlowMQTT";
@@ -51,11 +54,33 @@ std::string ClassFlowControll::doSingleStep(std::string _stepname, std::string _
return result;
}
std::vector<HTMLInfo*> ClassFlowControll::GetAllDigital()
std::string ClassFlowControll::TranslateAktstatus(std::string _input)
{
for (int i = 0; i < FlowControll.size(); ++i)
if (FlowControll[i]->name().compare("ClassFlowDigit") == 0)
return ((ClassFlowDigit*) (FlowControll[i]))->GetHTMLInfo();
if (_input.compare("ClassFlowMakeImage") == 0)
return ("Take Image");
if (_input.compare("ClassFlowAlignment") == 0)
return ("Aligning");
//if (_input.compare("ClassFlowAnalog") == 0)
// return ("Analog ROIs");
if (_input.compare("ClassFlowCNNGeneral") == 0)
return ("Digitalization of ROIs");
if (_input.compare("ClassFlowMQTT") == 0)
return ("Sending MQTT");
if (_input.compare("ClassFlowPostProcessing") == 0)
return ("Processing");
return "Unkown Status";
}
std::vector<HTMLInfo*> ClassFlowControll::GetAllDigital()
{
if (flowdigit)
{
printf("ClassFlowControll::GetAllDigital - flowdigit != NULL\n");
return flowdigit->GetHTMLInfo();
}
std::vector<HTMLInfo*> empty;
return empty;
@@ -63,14 +88,43 @@ std::vector<HTMLInfo*> ClassFlowControll::GetAllDigital()
std::vector<HTMLInfo*> ClassFlowControll::GetAllAnalog()
{
for (int i = 0; i < FlowControll.size(); ++i)
if (FlowControll[i]->name().compare("ClassFlowAnalog") == 0)
return ((ClassFlowAnalog*) (FlowControll[i]))->GetHTMLInfo();
if (flowanalog)
return flowanalog->GetHTMLInfo();
std::vector<HTMLInfo*> empty;
return empty;
}
t_CNNType ClassFlowControll::GetTypeDigital()
{
if (flowdigit)
return flowdigit->getCNNType();
return t_CNNType::None;
}
t_CNNType ClassFlowControll::GetTypeAnalog()
{
if (flowanalog)
return flowanalog->getCNNType();
return t_CNNType::None;
}
string ClassFlowControll::GetMQTTMainTopic()
{
for (int i = 0; i < FlowControll.size(); ++i)
if (FlowControll[i]->name().compare("ClassFlowMQTT") == 0)
return ((ClassFlowMQTT*) (FlowControll[i]))->GetMQTTMainTopic();
return "";
}
void ClassFlowControll::SetInitialParameter(void)
{
@@ -82,7 +136,7 @@ void ClassFlowControll::SetInitialParameter(void)
flowpostprocessing = NULL;
disabled = false;
aktRunNr = 0;
aktstatus = "Startup";
aktstatus = "Booting ...";
}
@@ -110,20 +164,20 @@ ClassFlow* ClassFlowControll::CreateClassFlow(std::string _type)
}
if (toUpper(_type).compare("[ANALOG]") == 0)
{
cfc = new ClassFlowAnalog(&FlowControll);
flowanalog = (ClassFlowAnalog*) cfc;
cfc = new ClassFlowCNNGeneral(flowalignment);
flowanalog = (ClassFlowCNNGeneral*) cfc;
}
if (toUpper(_type).compare(0, 7, "[DIGITS") == 0)
{
cfc = new ClassFlowDigit(&FlowControll);
flowdigit = (ClassFlowDigit*) cfc;
cfc = new ClassFlowCNNGeneral(flowalignment);
flowdigit = (ClassFlowCNNGeneral*) cfc;
}
if (toUpper(_type).compare("[MQTT]") == 0)
cfc = new ClassFlowMQTT(&FlowControll);
if (toUpper(_type).compare("[POSTPROCESSING]") == 0)
{
cfc = new ClassFlowPostProcessing(&FlowControll);
cfc = new ClassFlowPostProcessing(&FlowControll, flowanalog, flowdigit);
flowpostprocessing = (ClassFlowPostProcessing*) cfc;
}
@@ -168,14 +222,17 @@ void ClassFlowControll::InitFlow(std::string config)
cfc = CreateClassFlow(line);
if (cfc)
{
printf("Start ReadParameter\n");
printf("Start ReadParameter (%s)\n", line.c_str());
cfc->ReadParameter(pFile, line);
}
else
{
fgets(zw, 1024, pFile);
printf("%s", zw);
line = std::string(zw);
line = "";
if (fgets(zw, 1024, pFile) && !feof(pFile))
{
printf("Read: %s", zw);
line = std::string(zw);
}
}
}
@@ -183,8 +240,8 @@ void ClassFlowControll::InitFlow(std::string config)
}
std::string ClassFlowControll::getActStatus(){
return aktstatus;
std::string* ClassFlowControll::getActStatus(){
return &aktstatus;
}
void ClassFlowControll::doFlowMakeImageOnly(string time){
@@ -193,9 +250,9 @@ void ClassFlowControll::doFlowMakeImageOnly(string time){
for (int i = 0; i < FlowControll.size(); ++i)
{
if (FlowControll[i]->name() == "ClassFlowMakeImage") {
zw_time = gettimestring("%Y%m%d-%H%M%S");
aktstatus = zw_time + ": " + FlowControll[i]->name();
string zw = "FlowControll.doFlowMakeImageOnly - " + FlowControll[i]->name();
// zw_time = gettimestring("%Y%m%d-%H%M%S");
zw_time = gettimestring("%H:%M:%S");
aktstatus = TranslateAktstatus(FlowControll[i]->name()) + " (" + zw_time + ")";
FlowControll[i]->doFlow(time);
}
}
@@ -215,8 +272,11 @@ bool ClassFlowControll::doFlow(string time)
for (int i = 0; i < FlowControll.size(); ++i)
{
zw_time = gettimestring("%Y%m%d-%H%M%S");
aktstatus = zw_time + ": " + FlowControll[i]->name();
zw_time = gettimestring("%H:%M:%S");
aktstatus = TranslateAktstatus(FlowControll[i]->name()) + " (" + zw_time + ")";
// zw_time = gettimestring("%Y%m%d-%H%M%S");
// aktstatus = zw_time + ": " + FlowControll[i]->name();
string zw = "FlowControll.doFlow - " + FlowControll[i]->name();
@@ -225,7 +285,7 @@ bool ClassFlowControll::doFlow(string time)
if (!FlowControll[i]->doFlow(time)){
repeat++;
LogFile.WriteToFile("Fehler im vorheriger Schritt - wird zum " + to_string(repeat) + ". Mal wiederholt");
i = -1; // vorheriger Schritt muss wiederholt werden (vermutlich Bilder aufnehmen)
if (i) i -= 1; // vorheriger Schritt muss wiederholt werden (vermutlich Bilder aufnehmen)
result = false;
if (repeat > 5) {
LogFile.WriteToFile("Wiederholung 5x nicht erfolgreich --> reboot");
@@ -243,53 +303,49 @@ bool ClassFlowControll::doFlow(string time)
#endif
}
zw_time = gettimestring("%Y%m%d-%H%M%S");
aktstatus = zw_time + ": Flow is done";
zw_time = gettimestring("%H:%M:%S");
aktstatus = "Flow finished (" + zw_time + ")";
return result;
}
void ClassFlowControll::UpdateAktStatus(std::string _flow)
{
aktstatus = gettimestring("%Y%m%d-%H%M%S");
aktstatus = aktstatus + "\t" + std::to_string(aktRunNr) + "\t";
if (_flow == "ClassFlowMakeImage")
aktstatus = aktstatus + "Taking Raw Image";
else
if (_flow == "ClassFlowAlignment")
aktstatus = aktstatus + "Aligning Image";
}
string ClassFlowControll::getReadoutAll(int _type)
{
std::vector<NumberPost*> numbers = flowpostprocessing->GetNumbers();
std::string out = "";
for (int i = 0; i < numbers.size(); ++i)
if (flowpostprocessing)
{
out = out + numbers[i]->name + "\t";
switch (_type) {
case READOUT_TYPE_VALUE:
out = out + numbers[i]->ReturnValue;
break;
case READOUT_TYPE_PREVALUE:
out = out + numbers[i]->ReturnPreValue;
break;
case READOUT_TYPE_RAWVALUE:
out = out + numbers[i]->ReturnRawValue;
break;
case READOUT_TYPE_ERROR:
out = out + numbers[i]->ErrorMessageText;
break;
}
if (i < numbers.size()-1)
out = out + "\r\n";
}
std::vector<NumberPost*> *numbers = flowpostprocessing->GetNumbers();
// printf("OUT: %s", out.c_str());
for (int i = 0; i < (*numbers).size(); ++i)
{
out = out + (*numbers)[i]->name + "\t";
switch (_type) {
case READOUT_TYPE_VALUE:
out = out + (*numbers)[i]->ReturnValueNoError;
break;
case READOUT_TYPE_PREVALUE:
if (flowpostprocessing->PreValueUse)
{
if ((*numbers)[i]->PreValueOkay)
out = out + (*numbers)[i]->ReturnPreValue;
else
out = out + "PreValue too old";
}
else
out = out + "PreValue deactivated";
break;
case READOUT_TYPE_RAWVALUE:
out = out + (*numbers)[i]->ReturnRawValue;
break;
case READOUT_TYPE_ERROR:
out = out + (*numbers)[i]->ErrorMessageText;
break;
}
if (i < (*numbers).size()-1)
out = out + "\r\n";
}
// printf("OUT: %s", out.c_str());
}
return out;
}
@@ -328,7 +384,7 @@ string ClassFlowControll::GetPrevalue(std::string _number)
return std::string();
}
std::string ClassFlowControll::UpdatePrevalue(std::string _newvalue, std::string _numbers)
std::string ClassFlowControll::UpdatePrevalue(std::string _newvalue, std::string _numbers, bool _extern)
{
float zw;
char* p;
@@ -350,7 +406,7 @@ std::string ClassFlowControll::UpdatePrevalue(std::string _newvalue, std::string
if (flowpostprocessing)
{
flowpostprocessing->SetPreValue(zw, _numbers);
flowpostprocessing->SetPreValue(zw, _numbers, _extern);
return _newvalue;
}
@@ -442,6 +498,7 @@ bool ClassFlowControll::ReadParameter(FILE* pfile, string& aktparamgraph)
return true;
}
int ClassFlowControll::CleanTempFolder() {
const char* folderPath = "/sdcard/img_tmp";
@@ -497,53 +554,59 @@ esp_err_t ClassFlowControll::GetJPGStream(std::string _fn, httpd_req_t *req)
{
_send = flowalignment->ImageBasis;
}
if (_fn == "alg_roi.jpg")
else
{
CImageBasis* _imgzw = new CImageBasis(flowalignment->ImageBasis);
flowalignment->DrawRef(_imgzw);
if (flowdigit) flowdigit->DrawROI(_imgzw);
if (flowanalog) flowanalog->DrawROI(_imgzw);
_send = _imgzw;
Dodelete = true;
}
if (_fn == "alg_roi.jpg")
{
CImageBasis* _imgzw = new CImageBasis(flowalignment->ImageBasis);
flowalignment->DrawRef(_imgzw);
if (flowdigit) flowdigit->DrawROI(_imgzw);
if (flowanalog) flowanalog->DrawROI(_imgzw);
_send = _imgzw;
Dodelete = true;
}
else
{
std::vector<HTMLInfo*> htmlinfo;
htmlinfo = GetAllDigital();
for (int i = 0; i < htmlinfo.size(); ++i)
{
if (_fn == htmlinfo[i]->filename)
{
if (htmlinfo[i]->image)
_send = htmlinfo[i]->image;
}
if (_fn == htmlinfo[i]->filename_org)
{
if (htmlinfo[i]->image_org)
_send = htmlinfo[i]->image_org;
}
delete htmlinfo[i];
}
htmlinfo.clear();
std::vector<HTMLInfo*> htmlinfo;
htmlinfo = GetAllDigital();
for (int i = 0; i < htmlinfo.size(); ++i)
{
if (_fn == htmlinfo[i]->filename)
{
if (htmlinfo[i]->image)
_send = htmlinfo[i]->image;
}
if (_fn == htmlinfo[i]->filename_org)
{
if (htmlinfo[i]->image_org)
_send = htmlinfo[i]->image_org;
}
delete htmlinfo[i];
}
htmlinfo.clear();
if (!_send)
{
htmlinfo = GetAllAnalog();
for (int i = 0; i < htmlinfo.size(); ++i)
{
if (_fn == htmlinfo[i]->filename)
{
if (htmlinfo[i]->image)
_send = htmlinfo[i]->image;
}
if (_fn == htmlinfo[i]->filename_org)
{
if (htmlinfo[i]->image_org)
_send = htmlinfo[i]->image_org;
}
delete htmlinfo[i];
}
htmlinfo.clear();
htmlinfo = GetAllAnalog();
for (int i = 0; i < htmlinfo.size(); ++i)
{
if (_fn == htmlinfo[i]->filename)
{
if (htmlinfo[i]->image)
_send = htmlinfo[i]->image;
}
}
if (_fn == htmlinfo[i]->filename_org)
{
if (htmlinfo[i]->image_org)
_send = htmlinfo[i]->image_org;
}
delete htmlinfo[i];
}
htmlinfo.clear();
if (_send)
{

View File

@@ -1,14 +1,15 @@
#pragma once
#ifndef __FLOWCONTROLL__
#define __FLOWCONTROLL__
#include <string>
#include "ClassFlow.h"
#include "ClassFlowMakeImage.h"
#include "ClassFlowAlignment.h"
#include "ClassFlowDigit.h"
#include "ClassFlowAnalog.h"
#include "ClassFlowCNNGeneral.h"
#include "ClassFlowPostProcessing.h"
#include "ClassFlowMQTT.h"
#include "ClassFlowCNNGeneral.h"
#define READOUT_TYPE_VALUE 0
@@ -24,8 +25,9 @@ protected:
std::vector<ClassFlow*> FlowControll;
ClassFlowPostProcessing* flowpostprocessing;
ClassFlowAlignment* flowalignment;
ClassFlowAnalog* flowanalog;
ClassFlowDigit* flowdigit;
ClassFlowCNNGeneral* flowanalog;
ClassFlowCNNGeneral* flowdigit;
// ClassFlowDigit* flowdigit;
ClassFlowMakeImage* flowmakeimage;
ClassFlow* CreateClassFlow(std::string _type);
@@ -36,8 +38,6 @@ protected:
std::string aktstatus;
int aktRunNr;
void UpdateAktStatus(std::string _flow);
public:
void InitFlow(std::string config);
bool doFlow(string time);
@@ -45,10 +45,14 @@ public:
bool getStatusSetupModus(){return SetupModeActive;};
string getReadout(bool _rawvalue, bool _noerror);
string getReadoutAll(int _type);
string UpdatePrevalue(std::string _newvalue, std::string _numbers);
string UpdatePrevalue(std::string _newvalue, std::string _numbers, bool _extern);
string GetPrevalue(std::string _number = "");
bool ReadParameter(FILE* pfile, string& aktparamgraph);
string TranslateAktstatus(std::string _input);
string GetMQTTMainTopic();
esp_err_t GetJPGStream(std::string _fn, httpd_req_t *req);
esp_err_t SendRawJPG(httpd_req_t *req);
@@ -56,14 +60,19 @@ public:
bool isAutoStart(long &_intervall);
std::string getActStatus();
std::string* getActStatus();
std::vector<HTMLInfo*> GetAllDigital();
std::vector<HTMLInfo*> GetAllAnalog();
t_CNNType GetTypeDigital();
t_CNNType GetTypeAnalog();
int CleanTempFolder();
string name(){return "ClassFlowControll";};
};
#endif

View File

@@ -0,0 +1,52 @@
#ifndef __CLASSFLOWIMAGE_CLASS__
#define __CLASSFLOWIMAGE_CLASS__
#include "ClassFlowImage.h"
struct roi {
int posx, posy, deltax, deltay;
float result_float;
int result_klasse;
string name;
CImageBasis *image, *image_org;
};
struct general {
string name;
std::vector<roi*> ROI;
};
struct NumberPost {
float MaxRateValue;
bool useMaxRateValue;
bool ErrorMessage;
bool PreValueOkay;
bool AllowNegativeRates;
bool checkDigitIncreaseConsistency;
time_t lastvalue;
string timeStamp;
float FlowRateAct; // m3 / min
float PreValue; // letzter Wert, der gut ausgelesen wurde
float Value; // letzer ausgelesener Wert, inkl. Korrekturen
string ReturnRawValue; // Rohwert (mit N & führenden 0)
string ReturnValue; // korrigierter Rückgabewert, ggf. mit Fehlermeldung
string ReturnPreValue; // korrigierter Rückgabewert ohne Fehlermeldung
string ReturnValueNoError;
string ErrorMessageText; // Fehlermeldung bei Consistency Check
int AnzahlAnalog;
int AnzahlDigital;
int DecimalShift;
int DecimalShiftInitial;
int Nachkomma;
bool isExtendedResolution;
general *digit_roi;
general *analog_roi;
string name;
};
#endif

View File

@@ -1,420 +0,0 @@
#include "ClassFlowDigit.h"
//#include "CFindTemplate.h"
//#include "CTfLiteClass.h"
// #define OHNETFLITE
#ifndef OHNETFLITE
#include "CTfLiteClass.h"
#endif
// #include "bitmap_image.hpp"
#include "ClassLogFile.h"
static const char* TAG = "flow_digital";
void ClassFlowDigit::SetInitialParameter(void)
{
string cnnmodelfile = "";
modelxsize = 1;
modelysize = 1;
ListFlowControll = NULL;
previousElement = NULL;
SaveAllFiles = false;
disabled = false;
DecimalShift = 0;
DecimalShiftEnabled = false;
}
ClassFlowDigit::ClassFlowDigit() : ClassFlowImage(TAG)
{
SetInitialParameter();
}
ClassFlowDigit::ClassFlowDigit(std::vector<ClassFlow*>* lfc) : ClassFlowImage(lfc, TAG)
{
SetInitialParameter();
ListFlowControll = lfc;
for (int i = 0; i < ListFlowControll->size(); ++i)
{
if (((*ListFlowControll)[i])->name().compare("ClassFlowAlignment") == 0)
{
flowpostalignment = (ClassFlowAlignment*) (*ListFlowControll)[i];
}
}
}
ClassFlowDigit::ClassFlowDigit(std::vector<ClassFlow*>* lfc, ClassFlow *_prev) : ClassFlowImage(lfc, _prev, TAG)
{
SetInitialParameter();
ListFlowControll = lfc;
previousElement = _prev;
for (int i = 0; i < ListFlowControll->size(); ++i)
{
if (((*ListFlowControll)[i])->name().compare("ClassFlowAlignment") == 0)
{
flowpostalignment = (ClassFlowAlignment*) (*ListFlowControll)[i];
}
}
}
string ClassFlowDigit::getReadout(int _digit = 0)
{
string rst = "";
for (int i = 0; i < DIGIT[_digit]->ROI.size(); ++i)
{
if (DIGIT[_digit]->ROI[i]->resultklasse == 10)
rst = rst + "N";
else
rst = rst + std::to_string(DIGIT[_digit]->ROI[i]->resultklasse);
}
return rst;
}
bool ClassFlowDigit::ReadParameter(FILE* pfile, string& aktparamgraph)
{
std::vector<string> zerlegt;
aktparamgraph = trim(aktparamgraph);
if (aktparamgraph.size() == 0)
if (!this->GetNextParagraph(pfile, aktparamgraph))
return false;
printf("aktparamgraph: %s\n", aktparamgraph.c_str());
if ((aktparamgraph.compare(0, 7, "[Digits") != 0) && (aktparamgraph.compare(0, 8, ";[Digits") != 0)) // Paragraph passt nich zu MakeImage
return false;
int _pospkt = aktparamgraph.find_first_of(".");
int _posklammerzu = aktparamgraph.find_first_of("]");
if (_pospkt > -1)
NameDigit = aktparamgraph.substr(_pospkt+1, _posklammerzu - _pospkt-1);
else
NameDigit = "";
printf("Name Digit: %s\n", NameDigit.c_str());
if (aktparamgraph[0] == ';')
{
disabled = true;
while (getNextLine(pfile, &aktparamgraph) && !isNewParagraph(aktparamgraph));
printf("[Digits] is disabled !!!\n");
return true;
}
while (getNextLine(pfile, &aktparamgraph) && !isNewParagraph(aktparamgraph))
{
zerlegt = this->ZerlegeZeile(aktparamgraph);
if ((zerlegt[0] == "LogImageLocation") && (zerlegt.size() > 1))
{
LogImageLocation = "/sdcard" + zerlegt[1];
isLogImage = true;
}
if ((zerlegt[0] == "Model") && (zerlegt.size() > 1))
{
cnnmodelfile = zerlegt[1];
}
if ((zerlegt[0] == "ModelInputSize") && (zerlegt.size() > 2))
{
modelxsize = std::stoi(zerlegt[1]);
modelysize = std::stoi(zerlegt[2]);
}
if (zerlegt.size() >= 5)
{
digit* _digit = GetDIGIT(zerlegt[0], true);
roi* neuroi = _digit->ROI[_digit->ROI.size()-1];
neuroi->posx = std::stoi(zerlegt[1]);
neuroi->posy = std::stoi(zerlegt[2]);
neuroi->deltax = std::stoi(zerlegt[3]);
neuroi->deltay = std::stoi(zerlegt[4]);
neuroi->resultklasse = -1;
neuroi->image = NULL;
neuroi->image_org = NULL;
}
if ((toUpper(zerlegt[0]) == "SAVEALLFILES") && (zerlegt.size() > 1))
{
if (toUpper(zerlegt[1]) == "TRUE")
SaveAllFiles = true;
}
}
for (int _dig = 0; _dig < DIGIT.size(); ++_dig)
for (int i = 0; i < DIGIT[_dig]->ROI.size(); ++i)
{
DIGIT[_dig]->ROI[i]->image = new CImageBasis(modelxsize, modelysize, 3);
DIGIT[_dig]->ROI[i]->image_org = new CImageBasis(DIGIT[_dig]->ROI[i]->deltax, DIGIT[_dig]->ROI[i]->deltay, 3);
}
return true;
}
digit* ClassFlowDigit::FindDIGIT(string _name_number)
{
for (int i = 0; i < DIGIT.size(); ++i)
{
if (DIGIT[i]->name == _name_number)
return DIGIT[i];
}
return NULL;
}
digit* ClassFlowDigit::GetDIGIT(string _name, bool _create = true)
{
string _digit, _roi;
int _pospunkt = _name.find_first_of(".");
// printf("Name: %s, Pospunkt: %d\n", _name.c_str(), _pospunkt);
if (_pospunkt > -1)
{
_digit = _name.substr(0, _pospunkt);
_roi = _name.substr(_pospunkt+1, _name.length() - _pospunkt - 1);
}
else
{
_digit = "default";
_roi = _name;
}
digit *_ret = NULL;
for (int i = 0; i < DIGIT.size(); ++i)
{
if (DIGIT[i]->name == _digit)
_ret = DIGIT[i];
}
if (!_create) // nicht gefunden und soll auch nicht erzeugt werden, ggf. geht eine NULL zurück
return _ret;
if (_ret == NULL)
{
_ret = new digit;
_ret->name = _digit;
DIGIT.push_back(_ret);
}
roi* neuroi = new roi;
neuroi->name = _roi;
_ret->ROI.push_back(neuroi);
printf("GetDIGIT - digit %s - roi %s\n", _digit.c_str(), _roi.c_str());
return _ret;
}
string ClassFlowDigit::getHTMLSingleStep(string host)
{
string result, zw;
std::vector<HTMLInfo*> htmlinfo;
result = "<p>Found ROIs: </p> <p><img src=\"" + host + "/img_tmp/alg_roi.jpg\"></p>\n";
result = result + "Digital Counter: <p> ";
htmlinfo = GetHTMLInfo();
for (int i = 0; i < htmlinfo.size(); ++i)
{
if (htmlinfo[i]->val == 10)
zw = "NaN";
else
{
zw = to_string((int) htmlinfo[i]->val);
}
result = result + "<img src=\"" + host + "/img_tmp/" + htmlinfo[i]->filename + "\"> " + zw;
delete htmlinfo[i];
}
htmlinfo.clear();
return result;
}
bool ClassFlowDigit::doFlow(string time)
{
if (disabled)
return true;
if (!doAlignAndCut(time)){
return false;
};
doNeuralNetwork(time);
RemoveOldLogs();
return true;
}
bool ClassFlowDigit::doAlignAndCut(string time)
{
if (disabled)
return true;
CAlignAndCutImage *caic = flowpostalignment->GetAlignAndCutImage();
for (int _dig = 0; _dig < DIGIT.size(); ++_dig)
{
printf("DIGIT[_dig]->ROI.size() %d\n", DIGIT[_dig]->ROI.size());
for (int i = 0; i < DIGIT[_dig]->ROI.size(); ++i)
{
printf("DigitalDigit %d - Align&Cut\n", i);
caic->CutAndSave(DIGIT[_dig]->ROI[i]->posx, DIGIT[_dig]->ROI[i]->posy, DIGIT[_dig]->ROI[i]->deltax, DIGIT[_dig]->ROI[i]->deltay, DIGIT[_dig]->ROI[i]->image_org);
if (SaveAllFiles)
{
if (DIGIT[_dig]->name == "default")
DIGIT[_dig]->ROI[i]->image_org->SaveToFile(FormatFileName("/sdcard/img_tmp/" + DIGIT[_dig]->ROI[i]->name + ".jpg"));
else
DIGIT[_dig]->ROI[i]->image_org->SaveToFile(FormatFileName("/sdcard/img_tmp/" + DIGIT[_dig]->name + "_" + DIGIT[_dig]->ROI[i]->name + ".jpg"));
}
DIGIT[_dig]->ROI[i]->image_org->Resize(modelxsize, modelysize, DIGIT[_dig]->ROI[i]->image);
if (SaveAllFiles)
{
if (DIGIT[_dig]->name == "default")
DIGIT[_dig]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + DIGIT[_dig]->ROI[i]->name + ".bmp"));
else
DIGIT[_dig]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + DIGIT[_dig]->name + "_" + DIGIT[_dig]->ROI[i]->name + ".bmp"));
}
}
}
return true;
}
bool ClassFlowDigit::doNeuralNetwork(string time)
{
if (disabled)
return true;
string logPath = CreateLogFolder(time);
#ifndef OHNETFLITE
CTfLiteClass *tflite = new CTfLiteClass;
string zwcnn = FormatFileName("/sdcard" + cnnmodelfile);
printf(zwcnn.c_str());printf("\n");
if (!tflite->LoadModel(zwcnn)) {
printf("Can't read model file /sdcard%s\n", cnnmodelfile.c_str());
delete tflite;
return false;
}
tflite->MakeAllocate();
#endif
for (int _dig = 0; _dig < DIGIT.size(); ++_dig)
for (int i = 0; i < DIGIT[_dig]->ROI.size(); ++i)
{
printf("DigitalDigit %d - TfLite\n", i);
DIGIT[_dig]->ROI[i]->resultklasse = 0;
#ifndef OHNETFLITE
DIGIT[_dig]->ROI[i]->resultklasse = tflite->GetClassFromImageBasis(DIGIT[_dig]->ROI[i]->image);
#endif
printf("Result Digit%i: %d\n", i, DIGIT[_dig]->ROI[i]->resultklasse);
if (isLogImage)
{
LogImage(logPath, DIGIT[_dig]->ROI[i]->name, NULL, &DIGIT[_dig]->ROI[i]->resultklasse, time, DIGIT[_dig]->ROI[i]->image_org);
}
}
#ifndef OHNETFLITE
delete tflite;
#endif
return true;
}
void ClassFlowDigit::DrawROI(CImageBasis *_zw)
{
for (int _dig = 0; _dig < DIGIT.size(); ++_dig)
for (int i = 0; i < DIGIT[_dig]->ROI.size(); ++i)
_zw->drawRect(DIGIT[_dig]->ROI[i]->posx, DIGIT[_dig]->ROI[i]->posy, DIGIT[_dig]->ROI[i]->deltax, DIGIT[_dig]->ROI[i]->deltay, 0, 0, (255 - _dig*100), 2);
}
std::vector<HTMLInfo*> ClassFlowDigit::GetHTMLInfo()
{
std::vector<HTMLInfo*> result;
for (int _dig = 0; _dig < DIGIT.size(); ++_dig)
for (int i = 0; i < DIGIT[_dig]->ROI.size(); ++i)
{
if (DIGIT[_dig]->name == "default")
DIGIT[_dig]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + DIGIT[_dig]->ROI[i]->name + ".bmp"));
else
DIGIT[_dig]->ROI[i]->image->SaveToFile(FormatFileName("/sdcard/img_tmp/" + DIGIT[_dig]->name + "_" + DIGIT[_dig]->ROI[i]->name + ".bmp"));
HTMLInfo *zw = new HTMLInfo;
if (DIGIT[_dig]->name == "default")
{
zw->filename = DIGIT[_dig]->ROI[i]->name + ".bmp";
zw->filename_org = DIGIT[_dig]->ROI[i]->name + ".jpg";
}
else
{
zw->filename = DIGIT[_dig]->name + "_" + DIGIT[_dig]->ROI[i]->name + ".bmp";
zw->filename_org = DIGIT[_dig]->name + "_" + DIGIT[_dig]->ROI[i]->name + ".jpg";
}
zw->val = DIGIT[_dig]->ROI[i]->resultklasse;
zw->image = DIGIT[_dig]->ROI[i]->image;
zw->image_org = DIGIT[_dig]->ROI[i]->image_org;
result.push_back(zw);
}
return result;
}
int ClassFlowDigit::getAnzahlDIGIT()
{
return DIGIT.size();
}
string ClassFlowDigit::getNameDIGIT(int _digit)
{
if (_digit < DIGIT.size())
return DIGIT[_digit]->name;
return "DIGIT DOES NOT EXIST";
}
digit* ClassFlowDigit::GetDIGIT(int _digit)
{
if (_digit < DIGIT.size())
return DIGIT[_digit];
return NULL;
}
void ClassFlowDigit::UpdateNameNumbers(std::vector<std::string> *_name_numbers)
{
for (int _dig = 0; _dig < DIGIT.size(); _dig++)
{
std::string _name = DIGIT[_dig]->name;
bool found = false;
for (int i = 0; i < (*_name_numbers).size(); ++i)
{
if ((*_name_numbers)[i] == _name)
found = true;
}
if (!found)
(*_name_numbers).push_back(_name);
}
}

View File

@@ -1,68 +0,0 @@
#pragma once
#include "ClassFlowImage.h"
#include "ClassFlowAlignment.h"
#include "Helper.h"
#include <string>
struct roi {
int posx, posy, deltax, deltay;
int resultklasse;
string name;
CImageBasis *image, *image_org;
roi* next;
};
struct digit {
string name;
std::vector<roi*> ROI;
};
class ClassFlowDigit :
public ClassFlowImage
{
protected:
// std::vector<roi*> ROI;
std::vector<digit*> DIGIT;
string cnnmodelfile;
int modelxsize, modelysize;
bool SaveAllFiles;
string NameDigit;
int DecimalShift;
bool DecimalShiftEnabled;
ClassFlowAlignment* flowpostalignment;
bool doNeuralNetwork(string time);
bool doAlignAndCut(string time);
void SetInitialParameter(void);
public:
ClassFlowDigit();
ClassFlowDigit(std::vector<ClassFlow*>* lfc);
ClassFlowDigit(std::vector<ClassFlow*>* lfc, ClassFlow *_prev);
bool ReadParameter(FILE* pfile, string& aktparamgraph);
bool doFlow(string time);
string getHTMLSingleStep(string host);
string getReadout(int _digit);
std::vector<HTMLInfo*> GetHTMLInfo();
int getAnzahlDIGIT();
digit* GetDIGIT(int _digit);
digit* GetDIGIT(string _name, bool _create);
digit* FindDIGIT(string _name_number);
string getNameDIGIT(int _digit);
void UpdateNameNumbers(std::vector<std::string> *_name_numbers);
void DrawROI(CImageBasis *_zw);
string name(){return "ClassFlowDigit";};
};

View File

@@ -48,9 +48,14 @@ void ClassFlowImage::LogImage(string logPath, string name, float *resultFloat, i
if (!isLogImage)
return;
char buf[10];
if (resultFloat != NULL) {
sprintf(buf, "%.1f_", *resultFloat);
if (*resultFloat < 0)
sprintf(buf, "N.N_");
else
sprintf(buf, "%.1f_", *resultFloat);
} else if (resultInt != NULL) {
sprintf(buf, "%d_", *resultInt);
} else {

View File

@@ -122,6 +122,12 @@ bool ClassFlowMQTT::ReadParameter(FILE* pfile, string& aktparamgraph)
}
string ClassFlowMQTT::GetMQTTMainTopic()
{
return maintopic;
}
bool ClassFlowMQTT::doFlow(string zwtime)
{
if (!MQTTenable)
@@ -148,16 +154,16 @@ bool ClassFlowMQTT::doFlow(string zwtime)
if (flowpostprocessing)
{
std::vector<NumberPost*> NUMBERS = flowpostprocessing->GetNumbers();
std::vector<NumberPost*>* NUMBERS = flowpostprocessing->GetNumbers();
for (int i = 0; i < NUMBERS.size(); ++i)
for (int i = 0; i < (*NUMBERS).size(); ++i)
{
result = NUMBERS[i]->ReturnValueNoError;
resulterror = NUMBERS[i]->ErrorMessageText;
resultrate = std::to_string(NUMBERS[i]->FlowRateAct);
resulttimestamp = NUMBERS[i]->timeStamp;
result = (*NUMBERS)[i]->ReturnValueNoError;
resulterror = (*NUMBERS)[i]->ErrorMessageText;
resultrate = std::to_string((*NUMBERS)[i]->FlowRateAct);
resulttimestamp = (*NUMBERS)[i]->timeStamp;
namenumber = NUMBERS[i]->name;
namenumber = (*NUMBERS)[i]->name;
if (namenumber == "default")
namenumber = maintopic + "/";
else

View File

@@ -23,6 +23,8 @@ public:
ClassFlowMQTT(std::vector<ClassFlow*>* lfc);
ClassFlowMQTT(std::vector<ClassFlow*>* lfc, ClassFlow *_prev);
string GetMQTTMainTopic();
bool ReadParameter(FILE* pfile, string& aktparamgraph);
bool doFlow(string time);
string name(){return "ClassFlowMQTT";};

View File

@@ -1,5 +1,4 @@
#include "ClassFlowPostProcessing.h"
#include "Helper.h"
#include "ClassFlowMakeImage.h"
#include "ClassLogFile.h"
@@ -28,21 +27,27 @@ string ClassFlowPostProcessing::GetPreValue(std::string _number)
if (NUMBERS[i]->name == _number)
index = i;
// result = RundeOutput(NUMBERS[index]->PreValue, -NUMBERS[index]->DecimalShift);
result = RundeOutput(NUMBERS[index]->PreValue, NUMBERS[index]->Nachkomma);
// if (NUMBERS[index]->digit_roi && NUMBERS[index]->analog_roi)
// result = RundeOutput(NUMBERS[index]->PreValue, NUMBERS[index]->AnzahlAnalog - NUMBERS[index]->DecimalShift);
return result;
}
void ClassFlowPostProcessing::SetPreValue(float zw, string _numbers)
void ClassFlowPostProcessing::SetPreValue(float zw, string _numbers, bool _extern)
{
printf("SetPrevalue: %f, %s\n", zw, _numbers.c_str());
for (int j = 0; j < NUMBERS.size(); ++j)
{
// printf("Number %d, %s\n", j, NUMBERS[j]->name.c_str());
if (NUMBERS[j]->name == _numbers)
{
NUMBERS[j]->PreValue = zw;
if (_extern)
{
time(&(NUMBERS[j]->lastvalue));
localtime(&(NUMBERS[j]->lastvalue));
}
// printf("Found %d! - set to %f\n", j, NUMBERS[j]->PreValue);
}
}
UpdatePreValueINI = true;
SavePreValue();
@@ -118,7 +123,7 @@ bool ClassFlowPostProcessing::LoadPreValue(void)
if (NUMBERS[j]->digit_roi || NUMBERS[j]->analog_roi)
{
NUMBERS[j]->ReturnValue = RundeOutput(NUMBERS[j]->Value, NUMBERS[j]->AnzahlAnalog - NUMBERS[j]->DecimalShift);
NUMBERS[j]->ReturnValue = RundeOutput(NUMBERS[j]->Value, NUMBERS[j]->Nachkomma);
NUMBERS[j]->ReturnValueNoError = NUMBERS[j]->ReturnValue;
}
}
@@ -180,7 +185,7 @@ bool ClassFlowPostProcessing::LoadPreValue(void)
if (NUMBERS[0]->digit_roi || NUMBERS[0]->analog_roi)
{
NUMBERS[0]->ReturnValue = RundeOutput(NUMBERS[0]->Value, NUMBERS[0]->AnzahlAnalog - NUMBERS[0]->DecimalShift);
NUMBERS[0]->ReturnValue = RundeOutput(NUMBERS[0]->Value, NUMBERS[0]->Nachkomma);
NUMBERS[0]->ReturnValueNoError = NUMBERS[0]->ReturnValue;
}
@@ -207,8 +212,9 @@ void ClassFlowPostProcessing::SavePreValue()
struct tm* timeinfo = localtime(&NUMBERS[j]->lastvalue);
strftime(buffer, 80, PREVALUE_TIME_FORMAT_OUTPUT, timeinfo);
NUMBERS[j]->timeStamp = std::string(buffer);
// printf("SaverPreValue %d, Value: %f, Nachkomma %d\n", j, NUMBERS[j]->PreValue, NUMBERS[j]->Nachkomma);
_zw = NUMBERS[j]->name + "\t" + NUMBERS[j]->timeStamp + "\t" + RundeOutput(NUMBERS[j]->Value, NUMBERS[j]->Nachkomma) + "\n";
_zw = NUMBERS[j]->name + "\t" + NUMBERS[j]->timeStamp + "\t" + RundeOutput(NUMBERS[j]->PreValue, NUMBERS[j]->Nachkomma) + "\n";
printf("Write PreValue Zeile: %s\n", _zw.c_str());
fputs(_zw.c_str(), pFile);
@@ -220,21 +226,19 @@ void ClassFlowPostProcessing::SavePreValue()
}
ClassFlowPostProcessing::ClassFlowPostProcessing(std::vector<ClassFlow*>* lfc)
ClassFlowPostProcessing::ClassFlowPostProcessing(std::vector<ClassFlow*>* lfc, ClassFlowCNNGeneral *_analog, ClassFlowCNNGeneral *_digit)
{
// FlowRateAct = 0;
PreValueUse = false;
PreValueAgeStartup = 30;
ErrorMessage = false;
ListFlowControll = NULL;
// PreValueOkay = false;
// DecimalShift = 0;
// ErrorMessageText = "";
// timeStamp = "";
FilePreValue = FormatFileName("/sdcard/config/prevalue.ini");
ListFlowControll = lfc;
flowMakeImage = NULL;
UpdatePreValueINI = false;
IgnoreLeadingNaN = false;
flowAnalog = _analog;
flowDigit = _digit;
for (int i = 0; i < ListFlowControll->size(); ++i)
{
@@ -245,6 +249,36 @@ ClassFlowPostProcessing::ClassFlowPostProcessing(std::vector<ClassFlow*>* lfc)
}
}
void ClassFlowPostProcessing::handleDecimalExtendedResolution(string _decsep, string _value)
{
string _digit, _decpos;
int _pospunkt = _decsep.find_first_of(".");
// printf("Name: %s, Pospunkt: %d\n", _decsep.c_str(), _pospunkt);
if (_pospunkt > -1)
_digit = _decsep.substr(0, _pospunkt);
else
_digit = "default";
for (int j = 0; j < NUMBERS.size(); ++j)
{
bool _zwdc = false;
if (toUpper(_value) == "TRUE")
_zwdc = true;
if (_digit == "default") // erstmal auf default setzen (falls sonst nichts gesetzt)
{
NUMBERS[j]->isExtendedResolution = _zwdc;
}
if (NUMBERS[j]->name == _digit)
{
NUMBERS[j]->isExtendedResolution = _zwdc;
}
}
}
void ClassFlowPostProcessing::handleDecimalSeparator(string _decsep, string _value)
{
string _digit, _decpos;
@@ -259,20 +293,26 @@ void ClassFlowPostProcessing::handleDecimalSeparator(string _decsep, string _val
{
int _zwdc = 0;
try
// try
{
_zwdc = stoi(_value);
}
catch(const std::exception& e)
/* catch(const std::exception& e)
{
printf("ERROR - Decimalshift is not a number: %s\n", _value.c_str());
}
*/
if (_digit == "default") // erstmal auf default setzen (falls sonst nichts gesetzt)
{
NUMBERS[j]->DecimalShift = _zwdc;
NUMBERS[j]->DecimalShiftInitial = _zwdc;
}
if (NUMBERS[j]->name == _digit)
{
NUMBERS[j]->DecimalShift = _zwdc;
NUMBERS[j]->DecimalShiftInitial = _zwdc;
}
NUMBERS[j]->Nachkomma = NUMBERS[j]->AnzahlAnalog - NUMBERS[j]->DecimalShift;
}
@@ -292,15 +332,15 @@ void ClassFlowPostProcessing::handleMaxRateValue(string _decsep, string _value)
{
float _zwdc = 1;
try
// try
{
_zwdc = stof(_value);
}
catch(const std::exception& e)
/* catch(const std::exception& e)
{
printf("ERROR - MaxRateValue is not a number: %s\n", _value.c_str());
}
*/
if (_digit == "default") // erstmal auf default setzen (falls sonst nichts gesetzt)
{
@@ -340,6 +380,11 @@ bool ClassFlowPostProcessing::ReadParameter(FILE* pfile, string& aktparamgraph)
zerlegt = this->ZerlegeZeile(aktparamgraph);
std::string _param = GetParameterName(zerlegt[0]);
if ((toUpper(_param) == "EXTENDEDRESOLUTION") && (zerlegt.size() > 1))
{
handleDecimalExtendedResolution(zerlegt[0], zerlegt[1]);
}
if ((toUpper(_param) == "DECIMALSHIFT") && (zerlegt.size() > 1))
{
handleDecimalSeparator(zerlegt[0], zerlegt[1]);
@@ -373,6 +418,13 @@ bool ClassFlowPostProcessing::ReadParameter(FILE* pfile, string& aktparamgraph)
if (toUpper(zerlegt[1]) == "TRUE")
ErrorMessage = true;
}
if ((toUpper(_param) == "IGNORELEADINGNAN") && (zerlegt.size() > 1))
{
if (toUpper(zerlegt[1]) == "TRUE")
IgnoreLeadingNaN = true;
}
if ((toUpper(_param) == "PREVALUEAGESTARTUP") && (zerlegt.size() > 1))
{
PreValueAgeStartup = std::stoi(zerlegt[1]);
@@ -388,33 +440,20 @@ bool ClassFlowPostProcessing::ReadParameter(FILE* pfile, string& aktparamgraph)
void ClassFlowPostProcessing::InitNUMBERS()
{
// ClassFlowDigit* _cdigit = NULL;
// ClassFlowAnalog* _canalog = NULL;
int anzDIGIT = 0;
int anzANALOG = 0;
std::vector<std::string> name_numbers;
flowAnalog = NULL;
flowDigit = NULL;
for (int i = 0; i < ListFlowControll->size(); ++i)
{
if (((*ListFlowControll)[i])->name().compare("ClassFlowDigit") == 0)
{
flowDigit = (ClassFlowDigit*) (*ListFlowControll)[i];
anzDIGIT = flowDigit->getAnzahlDIGIT();
}
if (((*ListFlowControll)[i])->name().compare("ClassFlowAnalog") == 0)
{
flowAnalog = (ClassFlowAnalog*)(*ListFlowControll)[i];
anzANALOG = flowAnalog->getAnzahlANALOG();
}
}
if (flowDigit)
{
anzDIGIT = flowDigit->getAnzahlGENERAL();
flowDigit->UpdateNameNumbers(&name_numbers);
}
if (flowAnalog)
{
anzANALOG = flowAnalog->getAnzahlGENERAL();
flowAnalog->UpdateNameNumbers(&name_numbers);
}
printf("Anzahl NUMBERS: %d - DIGITS: %d, ANALOG: %d\n", name_numbers.size(), anzDIGIT, anzANALOG);
@@ -426,7 +465,7 @@ void ClassFlowPostProcessing::InitNUMBERS()
_number->digit_roi = NULL;
if (flowDigit)
_number->digit_roi = flowDigit->FindDIGIT(name_numbers[_num]);
_number->digit_roi = flowDigit->FindGENERAL(name_numbers[_num]);
if (_number->digit_roi)
_number->AnzahlDigital = _number->digit_roi->ROI.size();
@@ -435,7 +474,7 @@ void ClassFlowPostProcessing::InitNUMBERS()
_number->analog_roi = NULL;
if (flowAnalog)
_number->analog_roi = flowAnalog->FindANALOG(name_numbers[_num]);
_number->analog_roi = flowAnalog->FindGENERAL(name_numbers[_num]);
if (_number->analog_roi)
@@ -456,6 +495,9 @@ void ClassFlowPostProcessing::InitNUMBERS()
_number->PreValueOkay = false;
_number->useMaxRateValue = false;
_number->DecimalShift = 0;
_number->DecimalShiftInitial = 0;
_number->isExtendedResolution = false;
_number->FlowRateAct = 0; // m3 / min
_number->PreValue = 0; // letzter Wert, der gut ausgelesen wurde
@@ -528,6 +570,10 @@ bool ClassFlowPostProcessing::doFlow(string zwtime)
// ErrorMessageText = "";
// Update Nachkomma, da sich beim Wechsel von CNNType Auto --> xyz auch die Nachkommastellen ändern können:
imagetime = flowMakeImage->getTimeImageTaken();
if (imagetime == 0)
time(&imagetime);
@@ -545,14 +591,30 @@ bool ClassFlowPostProcessing::doFlow(string zwtime)
NUMBERS[j]->ReturnRawValue = "";
NUMBERS[j]->ErrorMessageText = "";
UpdateNachkommaDecimalShift();
if (NUMBERS[j]->digit_roi)
NUMBERS[j]->ReturnRawValue = flowDigit->getReadout(j);
{
if (NUMBERS[j]->analog_roi)
NUMBERS[j]->ReturnRawValue = flowDigit->getReadout(j, false);
else
NUMBERS[j]->ReturnRawValue = flowDigit->getReadout(j, NUMBERS[j]->isExtendedResolution); // Extended Resolution nur falls es keine analogen Ziffern gibt
}
if (NUMBERS[j]->digit_roi && NUMBERS[j]->analog_roi)
NUMBERS[j]->ReturnRawValue = NUMBERS[j]->ReturnRawValue + ".";
if (NUMBERS[j]->analog_roi)
NUMBERS[j]->ReturnRawValue = NUMBERS[j]->ReturnRawValue + flowAnalog->getReadout(j);
NUMBERS[j]->ReturnRawValue = NUMBERS[j]->ReturnRawValue + flowAnalog->getReadout(j, NUMBERS[j]->isExtendedResolution);
NUMBERS[j]->ReturnRawValue = ShiftDecimal(NUMBERS[j]->ReturnRawValue, NUMBERS[j]->DecimalShift);
NUMBERS[j]->ReturnRawValue = ShiftDecimal(NUMBERS[j]->ReturnRawValue, NUMBERS[j]->DecimalShift);
if (IgnoreLeadingNaN)
{
while ((NUMBERS[j]->ReturnRawValue.length() > 1) && (NUMBERS[j]->ReturnRawValue[0] == 'N'))
{
NUMBERS[j]->ReturnRawValue.erase(0, 1);
}
}
rohwert = NUMBERS[j]->ReturnRawValue;
@@ -590,13 +652,13 @@ bool ClassFlowPostProcessing::doFlow(string zwtime)
NUMBERS[j]->Value = checkDigitConsistency(NUMBERS[j]->Value, NUMBERS[j]->DecimalShift, NUMBERS[j]->analog_roi != NULL, NUMBERS[j]->PreValue);
}
zwvalue = RundeOutput(NUMBERS[j]->Value, NUMBERS[j]->AnzahlAnalog - NUMBERS[j]->DecimalShift);
zwvalue = RundeOutput(NUMBERS[j]->Value, NUMBERS[j]->Nachkomma);
if ((!NUMBERS[j]->AllowNegativeRates) && (NUMBERS[j]->Value < NUMBERS[j]->PreValue))
{
NUMBERS[j]->ErrorMessageText = NUMBERS[j]->ErrorMessageText + "Neg. Rate - Read: " + zwvalue + " - Raw: " + NUMBERS[j]->ReturnRawValue + " - Pre: " + RundeOutput(NUMBERS[j]->PreValue, NUMBERS[j]->Nachkomma) + " ";
NUMBERS[j]->Value = NUMBERS[j]->PreValue;
zwvalue = RundeOutput(NUMBERS[j]->Value, NUMBERS[j]->AnzahlAnalog - NUMBERS[j]->DecimalShift);
zwvalue = RundeOutput(NUMBERS[j]->Value, NUMBERS[j]->Nachkomma);
}
if (NUMBERS[j]->useMaxRateValue && (abs(NUMBERS[j]->Value - NUMBERS[j]->PreValue) > NUMBERS[j]->MaxRateValue))
@@ -631,6 +693,49 @@ bool ClassFlowPostProcessing::doFlow(string zwtime)
return true;
}
void ClassFlowPostProcessing::UpdateNachkommaDecimalShift()
{
for (int j = 0; j < NUMBERS.size(); ++j)
{
if (NUMBERS[j]->digit_roi && !NUMBERS[j]->analog_roi) // es gibt nur digitale ziffern
{
// printf("Nurdigital\n");
NUMBERS[j]->DecimalShift = NUMBERS[j]->DecimalShiftInitial;
if (NUMBERS[j]->isExtendedResolution && flowDigit->isExtendedResolution()) // extended resolution ist an und soll auch bei dieser Ziffer verwendet werden
NUMBERS[j]->DecimalShift = NUMBERS[j]->DecimalShift-1;
NUMBERS[j]->Nachkomma = -NUMBERS[j]->DecimalShift;
}
if (!NUMBERS[j]->digit_roi && NUMBERS[j]->analog_roi) // es gibt nur analoge ziffern
{
// printf("Nur analog\n");
NUMBERS[j]->DecimalShift = NUMBERS[j]->DecimalShiftInitial;
if (NUMBERS[j]->isExtendedResolution && flowAnalog->isExtendedResolution()) // extended resolution ist an und soll auch bei dieser Ziffer verwendet werden
NUMBERS[j]->DecimalShift = NUMBERS[j]->DecimalShift-1;
NUMBERS[j]->Nachkomma = -NUMBERS[j]->DecimalShift;
}
if (NUMBERS[j]->digit_roi && NUMBERS[j]->analog_roi) // digital + analog
{
// printf("Nur digital + analog\n");
NUMBERS[j]->Nachkomma = NUMBERS[j]->analog_roi->ROI.size();
NUMBERS[j]->DecimalShift = NUMBERS[j]->DecimalShiftInitial;
if (NUMBERS[j]->isExtendedResolution && flowAnalog->isExtendedResolution()) // extended resolution ist an und soll auch bei dieser Ziffer verwendet werden
NUMBERS[j]->Nachkomma = NUMBERS[j]->Nachkomma+1;
}
printf("UpdateNachkommaDecShift NUMBER%i: Nachkomma %i, DecShift %i\n", j, NUMBERS[j]->Nachkomma,NUMBERS[j]->DecimalShift);
}
}
string ClassFlowPostProcessing::getReadout(int _number)
{
return NUMBERS[_number]->ReturnValue;

View File

@@ -1,49 +1,13 @@
#pragma once
#ifndef __FLOWPOSTPROCESSING__
#define __FLOWPOSTPROCESSING__
#include "ClassFlow.h"
#include "ClassFlowMakeImage.h"
#include "ClassFlowAnalog.h"
#include "ClassFlowDigit.h"
#include "ClassFlowCNNGeneral.h"
#include "ClassFlowDefineTypes.h"
#include <string>
struct NumberPost {
// int PreValueAgeStartup;
float MaxRateValue;
bool useMaxRateValue;
bool ErrorMessage;
bool PreValueOkay;
bool AllowNegativeRates;
bool checkDigitIncreaseConsistency;
time_t lastvalue;
string timeStamp;
float FlowRateAct; // m3 / min
float PreValue; // letzter Wert, der gut ausgelesen wurde
float Value; // letzer ausgelesener Wert, inkl. Korrekturen
string ReturnRawValue; // Rohwert (mit N & führenden 0)
string ReturnValue; // korrigierter Rückgabewert, ggf. mit Fehlermeldung
string ReturnPreValue; // korrigierter Rückgabewert ohne Fehlermeldung
string ReturnValueNoError;
string ErrorMessageText; // Fehlermeldung bei Consistency Check
int AnzahlAnalog;
int AnzahlDigital;
int DecimalShift;
int Nachkomma;
// ClassFlowAnalog* ANALOG;
// ClassFlowDigit* DIGIT;
digit *digit_roi;
analog *analog_roi;
string name;
};
class ClassFlowPostProcessing :
public ClassFlow
{
@@ -51,13 +15,13 @@ protected:
std::vector<NumberPost*> NUMBERS;
bool UpdatePreValueINI;
bool PreValueUse;
int PreValueAgeStartup;
bool ErrorMessage;
bool IgnoreLeadingNaN; // SPEZIALFALL für User Gustl
ClassFlowAnalog* flowAnalog;
ClassFlowDigit* flowDigit;
ClassFlowCNNGeneral* flowAnalog;
ClassFlowCNNGeneral* flowDigit;
string FilePreValue;
@@ -74,10 +38,14 @@ protected:
void InitNUMBERS();
void handleDecimalSeparator(string _decsep, string _value);
void handleMaxRateValue(string _decsep, string _value);
void handleDecimalExtendedResolution(string _decsep, string _value);
public:
ClassFlowPostProcessing(std::vector<ClassFlow*>* lfc);
bool PreValueUse;
ClassFlowPostProcessing(std::vector<ClassFlow*>* lfc, ClassFlowCNNGeneral *_analog, ClassFlowCNNGeneral *_digit);
bool ReadParameter(FILE* pfile, string& aktparamgraph);
bool doFlow(string time);
string getReadout(int _number);
@@ -87,9 +55,14 @@ public:
string getReadoutTimeStamp(int _number = 0);
void SavePreValue();
string GetPreValue(std::string _number = "");
void SetPreValue(float zw, string _numbers);
std::vector<NumberPost*> GetNumbers(){return NUMBERS;};
void SetPreValue(float zw, string _numbers, bool _extern = false);
void UpdateNachkommaDecimalShift();
std::vector<NumberPost*>* GetNumbers(){return &NUMBERS;};
string name(){return "ClassFlowPostProcessing";};
};
#endif

View File

@@ -83,17 +83,20 @@ FILE* OpenFileAndWait(const char* nm, const char* _mode, int _waitsec)
printf("open config file %s in mode %s\n", nm, _mode);
FILE *pfile = fopen(nm, _mode);
/*
if (pfile == NULL)
{
TickType_t xDelay;
xDelay = _waitsec * 1000 / portTICK_PERIOD_MS;
std::string zw = "File is locked: " + std::string(nm) + " - wait for " + std::to_string(_waitsec);
std::string zw = "File is locked: " + std::string(nm) + " - wait for " + std::to_string(_waitsec) + " seconds";
printf(zw.c_str());
printf("\n");
LogFile.WriteToFile(zw);
vTaskDelay( xDelay );
pfile = fopen(nm, _mode);
}
*/
return pfile;
}

View File

@@ -17,6 +17,7 @@ float CTfLiteClass::GetOutputValue(int nr)
return output2->data.f[nr];
}
int CTfLiteClass::GetClassFromImageBasis(CImageBasis *rs)
{
if (!LoadInputImageBasis(rs))
@@ -27,19 +28,36 @@ int CTfLiteClass::GetClassFromImageBasis(CImageBasis *rs)
return GetOutClassification();
}
int CTfLiteClass::GetOutClassification()
int CTfLiteClass::GetOutClassification(int _von, int _bis)
{
TfLiteTensor* output2 = interpreter->output(0);
float zw_max = 0;
float zw_max;
float zw;
int zw_class = -1;
int zw_class;
if (output2 == NULL)
return -1;
int numeroutput = output2->dims->data[1];
for (int i = 0; i < numeroutput; ++i)
//printf("\n number output neurons: %d\n\n", numeroutput);
if (_bis == -1)
_bis = numeroutput;
if (_von == -1)
_von = 0;
if (_bis > numeroutput)
{
printf("ANZAHL OUTPUT NEURONS passt nicht zu geforderter Classifizierung!");
return -1;
}
zw_max = output2->data.f[_von];
zw_class = _von;
for (int i = _von+1; i <= _bis; ++i)
{
zw = output2->data.f[i];
if (zw > zw_max)
@@ -48,7 +66,7 @@ int CTfLiteClass::GetOutClassification()
zw_class = i;
}
}
return zw_class;
return (zw_class - _von);
}
void CTfLiteClass::GetInputDimension(bool silent = false)
@@ -70,18 +88,18 @@ void CTfLiteClass::GetInputDimension(bool silent = false)
}
void CTfLiteClass::GetOutPut()
int CTfLiteClass::GetAnzOutPut(bool silent)
{
TfLiteTensor* output2 = this->interpreter->output(0);
int numdim = output2->dims->size;
printf("NumDimension: %d\n", numdim);
if (!silent) printf("NumDimension: %d\n", numdim);
int sizeofdim;
for (int j = 0; j < numdim; ++j)
{
sizeofdim = output2->dims->data[j];
printf("SizeOfDimension %d: %d\n", j, sizeofdim);
if (!silent) printf("SizeOfDimension %d: %d\n", j, sizeofdim);
}
@@ -92,8 +110,9 @@ void CTfLiteClass::GetOutPut()
for (int i = 0; i < numeroutput; ++i)
{
fo = output2->data.f[i];
printf("Result %d: %f\n", i, fo);
if (!silent) printf("Result %d: %f\n", i, fo);
}
return numeroutput;
}
void CTfLiteClass::Invoke()
@@ -106,7 +125,7 @@ void CTfLiteClass::Invoke()
bool CTfLiteClass::LoadInputImageBasis(CImageBasis *rs)
{
std::string zw = "ClassFlowAnalog::doNeuralNetwork nach LoadInputResizeImage: ";
std::string zw = "ClassFlowCNNGeneral::doNeuralNetwork nach LoadInputResizeImage: ";
unsigned int w = rs->width;
unsigned int h = rs->height;
@@ -149,6 +168,8 @@ void CTfLiteClass::MakeAllocate()
TfLiteStatus allocate_status = this->interpreter->AllocateTensors();
if (allocate_status != kTfLiteOk) {
TF_LITE_REPORT_ERROR(error_reporter, "AllocateTensors() failed");
LogFile.WriteToFile("AllocateTensors() failed");
this->GetInputDimension();
return;
}
@@ -236,7 +257,7 @@ CTfLiteClass::CTfLiteClass()
this->interpreter = nullptr;
this->input = nullptr;
this->output = nullptr;
this->kTensorArenaSize = 200 * 1024; /// laut testfile: 108000 - bisher 600
this->kTensorArenaSize = 800 * 1024; /// laut testfile: 108000 - bisher 600;; 2021-09-11: 200 * 1024
this->tensor_arena = new uint8_t[kTensorArenaSize];
}

View File

@@ -61,9 +61,13 @@ class CTfLiteClass
void GetInputTensorSize();
bool LoadInputImageBasis(CImageBasis *rs);
void Invoke();
void GetOutPut();
int GetOutClassification();
int GetAnzOutPut(bool silent = true);
// void GetOutPut();
// int GetOutClassification();
int GetOutClassification(int _von = -1, int _bis = -1);
int GetClassFromImageBasis(CImageBasis *rs);
std::string GetStatusFlow();
float GetOutputValue(int nr);
void GetInputDimension(bool silent);

View File

@@ -28,9 +28,6 @@ ClassFlowControll tfliteflow;
TaskHandle_t xHandleblink_task_doFlow = NULL;
TaskHandle_t xHandletask_autodoFlow = NULL;
bool flowisrunning = false;
long auto_intervall = 0;
@@ -283,37 +280,48 @@ esp_err_t handler_wasserzaehler(httpd_req_t *req)
txt = txt + "Digital Counter: <p> ";
httpd_resp_sendstr_chunk(req, txt.c_str());
std::vector<HTMLInfo*> htmlinfo;
htmlinfo = tfliteflow.GetAllDigital();
for (int i = 0; i < htmlinfo.size(); ++i)
std::vector<HTMLInfo*> htmlinfodig;
htmlinfodig = tfliteflow.GetAllDigital();
for (int i = 0; i < htmlinfodig.size(); ++i)
{
if (htmlinfo[i]->val == 10)
zw = "NaN";
if (tfliteflow.GetTypeDigital() == Digital)
{
if (htmlinfodig[i]->val == 10)
zw = "NaN";
else
zw = to_string((int) htmlinfodig[i]->val);
txt = "<img src=\"/img_tmp/" + htmlinfodig[i]->filename + "\"> " + zw;
}
else
{
zw = to_string((int) htmlinfo[i]->val);
std::stringstream stream;
stream << std::fixed << std::setprecision(1) << htmlinfodig[i]->val;
zw = stream.str();
txt = "<img src=\"/img_tmp/" + htmlinfodig[i]->filename + "\"> " + zw;
}
txt = "<img src=\"/img_tmp/" + htmlinfo[i]->filename + "\"> " + zw;
httpd_resp_sendstr_chunk(req, txt.c_str());
delete htmlinfo[i];
delete htmlinfodig[i];
}
htmlinfo.clear();
htmlinfodig.clear();
txt = " <p> Analog Meter: <p> ";
httpd_resp_sendstr_chunk(req, txt.c_str());
htmlinfo = tfliteflow.GetAllAnalog();
for (int i = 0; i < htmlinfo.size(); ++i)
std::vector<HTMLInfo*> htmlinfoana;
htmlinfoana = tfliteflow.GetAllAnalog();
for (int i = 0; i < htmlinfoana.size(); ++i)
{
std::stringstream stream;
stream << std::fixed << std::setprecision(1) << htmlinfo[i]->val;
stream << std::fixed << std::setprecision(1) << htmlinfoana[i]->val;
zw = stream.str();
txt = "<img src=\"/img_tmp/" + htmlinfo[i]->filename + "\"> " + zw;
txt = "<img src=\"/img_tmp/" + htmlinfoana[i]->filename + "\"> " + zw;
httpd_resp_sendstr_chunk(req, txt.c_str());
delete htmlinfo[i];
delete htmlinfoana[i];
}
htmlinfo.clear();
htmlinfoana.clear();
}
@@ -493,31 +501,7 @@ esp_err_t handler_editflow(httpd_req_t *req)
// string zwzw = "Do " + _task + " start\n"; printf(zwzw.c_str());
std::string zw = tfliteflow.doSingleStep("[Alignment]", _host);
httpd_resp_sendstr_chunk(req, zw.c_str());
}
if (_task.compare("test_analog") == 0)
{
std::string _host = "";
if (httpd_query_key_value(_query, "host", _valuechar, 30) == ESP_OK) {
_host = std::string(_valuechar);
}
// printf("Parameter host: "); printf(_host.c_str()); printf("\n");
// string zwzw = "Do " + _task + " start\n"; printf(zwzw.c_str());
std::string zw = tfliteflow.doSingleStep("[Analog]", _host);
httpd_resp_sendstr_chunk(req, zw.c_str());
}
if (_task.compare("test_digits") == 0)
{
std::string _host = "";
if (httpd_query_key_value(_query, "host", _valuechar, 30) == ESP_OK) {
_host = std::string(_valuechar);
}
// printf("Parameter host: "); printf(_host.c_str()); printf("\n");
// string zwzw = "Do " + _task + " start\n"; printf(zwzw.c_str());
std::string zw = tfliteflow.doSingleStep("[Digits]", _host);
httpd_resp_sendstr_chunk(req, zw.c_str());
}
}
/* Respond with an empty chunk to signal HTTP response completion */
httpd_resp_sendstr_chunk(req, NULL);
@@ -530,6 +514,34 @@ esp_err_t handler_editflow(httpd_req_t *req)
};
esp_err_t handler_statusflow(httpd_req_t *req)
{
#ifdef DEBUG_DETAIL_ON
LogFile.WriteHeapInfo("handler_prevalue - Start");
#endif
const char* resp_str;
#ifdef DEBUG_DETAIL_ON
printf("handler_prevalue:\n"); printf(req->uri); printf("\n");
#endif
string* zw = tfliteflow.getActStatus();
resp_str = zw->c_str();
httpd_resp_set_hdr(req, "Access-Control-Allow-Origin", "*");
httpd_resp_send(req, resp_str, strlen(resp_str));
/* Respond with an empty chunk to signal HTTP response completion */
httpd_resp_send_chunk(req, NULL, 0);
#ifdef DEBUG_DETAIL_ON
LogFile.WriteHeapInfo("handler_prevalue - Start");
#endif
return ESP_OK;
};
esp_err_t handler_prevalue(httpd_req_t *req)
{
#ifdef DEBUG_DETAIL_ON
@@ -569,7 +581,7 @@ esp_err_t handler_prevalue(httpd_req_t *req)
}
else
{
zw = "SetPrevalue to " + tfliteflow.UpdatePrevalue(_size, _numbers);
zw = "SetPrevalue to " + tfliteflow.UpdatePrevalue(_size, _numbers, true);
}
resp_str = zw.c_str();
@@ -655,6 +667,11 @@ void TFliteDoAutoStart()
xTaskCreate(&task_autodoFlow, "task_autodoFlow", configMINIMAL_STACK_SIZE * 64, NULL, tskIDLE_PRIORITY+1, &xHandletask_autodoFlow);
}
std::string GetMQTTMainTopic()
{
return tfliteflow.GetMQTTMainTopic();
}
void register_server_tflite_uri(httpd_handle_t server)
@@ -679,6 +696,10 @@ void register_server_tflite_uri(httpd_handle_t server)
camuri.user_ctx = (void*) "Light Off";
httpd_register_uri_handler(server, &camuri);
camuri.uri = "/statusflow.html";
camuri.handler = handler_statusflow;
camuri.user_ctx = (void*) "Light Off";
httpd_register_uri_handler(server, &camuri);
camuri.uri = "/editflow.html";
camuri.handler = handler_editflow;

View File

@@ -1,4 +1,5 @@
#include <esp_log.h>
#include <string>
#include <esp_http_server.h>
#include "CImageBasis.h"
@@ -13,6 +14,8 @@ void TFliteDoAutoStart();
bool isSetupModusActive();
std::string GetMQTTMainTopic();
esp_err_t GetJPG(std::string _filename, httpd_req_t *req);
esp_err_t GetRawJPG(httpd_req_t *req);

View File

@@ -50,14 +50,14 @@ std::string std_hostname = "watermeter";
std::string ipadress = "";
std::string ssid = "";
std::string getIPAddress()
std::string* getIPAddress()
{
return ipadress;
return &ipadress;
}
std::string getSSID()
std::string* getSSID()
{
return ssid;
return &ssid;
}
@@ -115,14 +115,10 @@ static void event_handler(void* arg, esp_event_base_t event_base,
LEDBlinkTask(200, 1, true);
esp_wifi_connect();
} else if (event_base == WIFI_EVENT && event_id == WIFI_EVENT_STA_DISCONNECTED) {
if (s_retry_num < EXAMPLE_ESP_MAXIMUM_RETRY) {
// LEDBlinkTask(200, 1, true);
// if (s_retry_num < EXAMPLE_ESP_MAXIMUM_RETRY){
esp_wifi_connect();
s_retry_num++;
ESP_LOGI(TAG, "retry to connect to the AP");
} else {
xEventGroupSetBits(s_wifi_event_group, WIFI_FAIL_BIT);
}
ESP_LOGI(TAG,"connect to the AP fail");
} else if (event_base == IP_EVENT && event_id == IP_EVENT_STA_GOT_IP) {
ip_event_got_ip_t* event = (ip_event_got_ip_t*) event_data;

View File

@@ -7,8 +7,8 @@ void wifi_init_sta(const char *_ssid, const char *_password, const char *_hostna
void wifi_init_sta(const char *_ssid, const char *_password, const char *_hostname);
void wifi_init_sta(const char *_ssid, const char *_password);
std::string getIPAddress();
std::string getSSID();
std::string* getIPAddress();
std::string* getSSID();
extern std::string hostname;
extern std::string std_hostname;

BIN
code/components/tfmicro.zip Normal file

Binary file not shown.

View File

@@ -23,7 +23,7 @@ if(NOT DEFINED ENV{IDF_PATH})
endif()
idf_component_register(
SRCS tensorflow/lite/micro/simple_memory_allocator.cc tensorflow/lite/micro/micro_error_reporter.cc tensorflow/lite/micro/memory_helpers.cc tensorflow/lite/micro/test_helpers.cc tensorflow/lite/micro/recording_micro_allocator.cc tensorflow/lite/micro/micro_time.cc tensorflow/lite/micro/recording_simple_memory_allocator.cc tensorflow/lite/micro/micro_string.cc tensorflow/lite/micro/micro_profiler.cc tensorflow/lite/micro/debug_log.cc tensorflow/lite/micro/all_ops_resolver.cc tensorflow/lite/micro/micro_utils.cc tensorflow/lite/micro/micro_interpreter.cc tensorflow/lite/micro/micro_allocator.cc tensorflow/lite/micro/system_setup.cc tensorflow/lite/micro/memory_planner/linear_memory_planner.cc tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc tensorflow/lite/c/common.c tensorflow/lite/core/api/error_reporter.cc tensorflow/lite/core/api/flatbuffer_conversions.cc tensorflow/lite/core/api/op_resolver.cc tensorflow/lite/core/api/tensor_utils.cc tensorflow/lite/kernels/internal/quantization_util.cc tensorflow/lite/kernels/kernel_util.cc tensorflow/lite/schema/schema_utils.cc tensorflow/lite/micro/kernels/activations.cc tensorflow/lite/micro/kernels/add.cc tensorflow/lite/micro/kernels/add_n.cc tensorflow/lite/micro/kernels/arg_min_max.cc tensorflow/lite/micro/kernels/batch_to_space_nd.cc tensorflow/lite/micro/kernels/cast.cc tensorflow/lite/micro/kernels/ceil.cc tensorflow/lite/micro/kernels/circular_buffer.cc tensorflow/lite/micro/kernels/comparisons.cc tensorflow/lite/micro/kernels/concatenation.cc tensorflow/lite/micro/kernels/conv.cc tensorflow/lite/micro/kernels/conv_common.cc tensorflow/lite/micro/kernels/depthwise_conv.cc tensorflow/lite/micro/kernels/depthwise_conv_common.cc tensorflow/lite/micro/kernels/dequantize.cc tensorflow/lite/micro/kernels/detection_postprocess.cc tensorflow/lite/micro/kernels/div.cc tensorflow/lite/micro/kernels/elementwise.cc tensorflow/lite/micro/kernels/elu.cc tensorflow/lite/micro/kernels/ethosu.cc tensorflow/lite/micro/kernels/exp.cc tensorflow/lite/micro/kernels/expand_dims.cc tensorflow/lite/micro/kernels/fill.cc tensorflow/lite/micro/kernels/floor.cc tensorflow/lite/micro/kernels/fully_connected.cc tensorflow/lite/micro/kernels/fully_connected_common.cc tensorflow/lite/micro/kernels/hard_swish.cc tensorflow/lite/micro/kernels/kernel_runner.cc tensorflow/lite/micro/kernels/kernel_util.cc tensorflow/lite/micro/kernels/l2norm.cc tensorflow/lite/micro/kernels/l2_pool_2d.cc tensorflow/lite/micro/kernels/leaky_relu.cc tensorflow/lite/micro/kernels/logical.cc tensorflow/lite/micro/kernels/logistic.cc tensorflow/lite/micro/kernels/maximum_minimum.cc tensorflow/lite/micro/kernels/mul.cc tensorflow/lite/micro/kernels/neg.cc tensorflow/lite/micro/kernels/pack.cc tensorflow/lite/micro/kernels/pad.cc tensorflow/lite/micro/kernels/pooling.cc tensorflow/lite/micro/kernels/prelu.cc tensorflow/lite/micro/kernels/quantize.cc tensorflow/lite/micro/kernels/quantize_common.cc tensorflow/lite/micro/kernels/reduce.cc tensorflow/lite/micro/kernels/reshape.cc tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc tensorflow/lite/micro/kernels/round.cc tensorflow/lite/micro/kernels/shape.cc tensorflow/lite/micro/kernels/softmax.cc tensorflow/lite/micro/kernels/softmax_common.cc tensorflow/lite/micro/kernels/space_to_batch_nd.cc tensorflow/lite/micro/kernels/split.cc tensorflow/lite/micro/kernels/split_v.cc tensorflow/lite/micro/kernels/squeeze.cc tensorflow/lite/micro/kernels/strided_slice.cc tensorflow/lite/micro/kernels/sub.cc tensorflow/lite/micro/kernels/svdf.cc tensorflow/lite/micro/kernels/svdf_common.cc tensorflow/lite/micro/kernels/tanh.cc tensorflow/lite/micro/kernels/transpose_conv.cc tensorflow/lite/micro/kernels/unpack.cc tensorflow/lite/micro/kernels/zeros_like.cc
SRCS tensorflow/lite/micro/simple_memory_allocator.cc tensorflow/lite/micro/debug_log.cc tensorflow/lite/micro/micro_error_reporter.cc tensorflow/lite/micro/memory_helpers.cc tensorflow/lite/micro/test_helpers.cc tensorflow/lite/micro/recording_micro_allocator.cc tensorflow/lite/micro/micro_time.cc tensorflow/lite/micro/recording_simple_memory_allocator.cc tensorflow/lite/micro/micro_string.cc tensorflow/lite/micro/micro_profiler.cc tensorflow/lite/micro/flatbuffer_utils.cc tensorflow/lite/micro/micro_graph.cc tensorflow/lite/micro/mock_micro_graph.cc tensorflow/lite/micro/all_ops_resolver.cc tensorflow/lite/micro/micro_utils.cc tensorflow/lite/micro/micro_interpreter.cc tensorflow/lite/micro/micro_allocator.cc tensorflow/lite/micro/system_setup.cc tensorflow/lite/micro/memory_planner/linear_memory_planner.cc tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc tensorflow/lite/schema/schema_utils.cc tensorflow/lite/c/common.c tensorflow/lite/core/api/tensor_utils.cc tensorflow/lite/core/api/error_reporter.cc tensorflow/lite/core/api/flatbuffer_conversions.cc tensorflow/lite/core/api/op_resolver.cc tensorflow/lite/kernels/kernel_util.cc tensorflow/lite/kernels/internal/quantization_util.cc tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc tensorflow/lite/micro/kernels/activations.cc tensorflow/lite/micro/kernels/activations_common.cc tensorflow/lite/micro/kernels/add.cc tensorflow/lite/micro/kernels/add_n.cc tensorflow/lite/micro/kernels/arg_min_max.cc tensorflow/lite/micro/kernels/batch_to_space_nd.cc tensorflow/lite/micro/kernels/cast.cc tensorflow/lite/micro/kernels/ceil.cc tensorflow/lite/micro/kernels/circular_buffer.cc tensorflow/lite/micro/kernels/comparisons.cc tensorflow/lite/micro/kernels/concatenation.cc tensorflow/lite/micro/kernels/conv.cc tensorflow/lite/micro/kernels/conv_common.cc tensorflow/lite/micro/kernels/cumsum.cc tensorflow/lite/micro/kernels/depth_to_space.cc tensorflow/lite/micro/kernels/depthwise_conv.cc tensorflow/lite/micro/kernels/depthwise_conv_common.cc tensorflow/lite/micro/kernels/dequantize.cc tensorflow/lite/micro/kernels/detection_postprocess.cc tensorflow/lite/micro/kernels/elementwise.cc tensorflow/lite/micro/kernels/elu.cc tensorflow/lite/micro/kernels/ethosu.cc tensorflow/lite/micro/kernels/exp.cc tensorflow/lite/micro/kernels/expand_dims.cc tensorflow/lite/micro/kernels/fill.cc tensorflow/lite/micro/kernels/floor.cc tensorflow/lite/micro/kernels/floor_div.cc tensorflow/lite/micro/kernels/floor_mod.cc tensorflow/lite/micro/kernels/fully_connected.cc tensorflow/lite/micro/kernels/fully_connected_common.cc tensorflow/lite/micro/kernels/gather.cc tensorflow/lite/micro/kernels/gather_nd.cc tensorflow/lite/micro/kernels/hard_swish.cc tensorflow/lite/micro/kernels/hard_swish_common.cc tensorflow/lite/micro/kernels/if.cc tensorflow/lite/micro/kernels/kernel_runner.cc tensorflow/lite/micro/kernels/kernel_util.cc tensorflow/lite/micro/kernels/l2norm.cc tensorflow/lite/micro/kernels/l2_pool_2d.cc tensorflow/lite/micro/kernels/leaky_relu.cc tensorflow/lite/micro/kernels/logical.cc tensorflow/lite/micro/kernels/logical_common.cc tensorflow/lite/micro/kernels/logistic.cc tensorflow/lite/micro/kernels/logistic_common.cc tensorflow/lite/micro/kernels/log_softmax.cc tensorflow/lite/micro/kernels/maximum_minimum.cc tensorflow/lite/micro/kernels/mul.cc tensorflow/lite/micro/kernels/neg.cc tensorflow/lite/micro/kernels/pack.cc tensorflow/lite/micro/kernels/pad.cc tensorflow/lite/micro/kernels/pooling.cc tensorflow/lite/micro/kernels/pooling_common.cc tensorflow/lite/micro/kernels/prelu.cc tensorflow/lite/micro/kernels/quantize.cc tensorflow/lite/micro/kernels/quantize_common.cc tensorflow/lite/micro/kernels/reduce.cc tensorflow/lite/micro/kernels/reshape.cc tensorflow/lite/micro/kernels/resize_bilinear.cc tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc tensorflow/lite/micro/kernels/round.cc tensorflow/lite/micro/kernels/shape.cc tensorflow/lite/micro/kernels/softmax.cc tensorflow/lite/micro/kernels/softmax_common.cc tensorflow/lite/micro/kernels/space_to_batch_nd.cc tensorflow/lite/micro/kernels/space_to_depth.cc tensorflow/lite/micro/kernels/split.cc tensorflow/lite/micro/kernels/split_v.cc tensorflow/lite/micro/kernels/squeeze.cc tensorflow/lite/micro/kernels/strided_slice.cc tensorflow/lite/micro/kernels/sub.cc tensorflow/lite/micro/kernels/svdf.cc tensorflow/lite/micro/kernels/svdf_common.cc tensorflow/lite/micro/kernels/tanh.cc tensorflow/lite/micro/kernels/transpose.cc tensorflow/lite/micro/kernels/transpose_conv.cc tensorflow/lite/micro/kernels/unpack.cc tensorflow/lite/micro/kernels/zeros_like.cc
INCLUDE_DIRS . third_party/gemmlowp third_party/flatbuffers/include third_party/ruy)
# Reduce the level of paranoia to be able to compile TF sources
@@ -32,7 +32,7 @@ target_compile_options(${COMPONENT_LIB} PRIVATE
-Wno-missing-field-initializers
-Wno-type-limits)
target_compile_options(${COMPONENT_LIB} PRIVATE -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Werror -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -DESP)
target_compile_options(${COMPONENT_LIB} PRIVATE $<$<COMPILE_LANGUAGE:CXX>: -std=c++11 -fno-rtti -fno-exceptions -fno-threadsafe-statics -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -O3 -Werror -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -DESP >)
target_compile_options(${COMPONENT_LIB} PRIVATE -Wimplicit-function-declaration -Werror -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -DESP)
target_compile_options(${COMPONENT_LIB} PRIVATE $<$<COMPILE_LANGUAGE:CXX>: -std=c++11 -fno-rtti -fno-exceptions -fno-threadsafe-statics -Werror -fno-unwind-tables -ffunction-sections -fdata-sections -fmessage-length=0 -DTF_LITE_STATIC_MEMORY -DTF_LITE_DISABLE_X86_NEON -Wsign-compare -Wdouble-promotion -Wshadow -Wunused-variable -Wmissing-field-initializers -Wunused-function -Wswitch -Wvla -Wall -Wextra -Wstrict-aliasing -Wno-unused-parameter -DESP >)
target_compile_options(${COMPONENT_LIB} INTERFACE $<$<IN_LIST:-DTF_LITE_STATIC_MEMORY,$<TARGET_PROPERTY:${COMPONENT_LIB},COMPILE_OPTIONS>>:-DTF_LITE_STATIC_MEMORY>)
target_link_libraries(${COMPONENT_LIB} PRIVATE -lm)

View File

@@ -63,7 +63,6 @@ typedef struct {
} TfLiteMirrorPaddingParams;
// Possible fused activation functions.
// TODO(aselle): rename to TfLiteActivation
typedef enum {
kTfLiteActNone = 0,
kTfLiteActRelu,
@@ -98,6 +97,8 @@ typedef struct {
TfLiteFusedActivation activation;
} TfLiteConv3DParams;
typedef TfLiteConv3DParams TfLiteConv3DTransposeParams;
typedef struct {
TfLitePadding padding;
int stride_width;
@@ -328,8 +329,9 @@ typedef struct {
} TfLitePadV2Params;
typedef struct {
// TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
// For now we will fix the maximum possible number of dimensions.
// These fields are only used in old models for backward compatibility.
// In the current implementation, we use the 2nd input of the op as the shape,
// and these fields are unused.
int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
int num_dimensions;
} TfLiteReshapeParams;
@@ -495,6 +497,11 @@ typedef struct {
TfLiteType value_dtype;
} TfLiteHashtableParams;
typedef struct {
const char* container;
const char* shared_name;
} TfLiteVarHandleParams;
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus

View File

@@ -29,7 +29,9 @@ extern "C" {
// library.
#ifdef SWIG
#define TFL_CAPI_EXPORT
#else
#elif defined(TFL_STATIC_LIBRARY_BUILD)
#define TFL_CAPI_EXPORT
#else // not definded TFL_STATIC_LIBRARY_BUILD
#if defined(_WIN32)
#ifdef TFL_COMPILE_LIBRARY
#define TFL_CAPI_EXPORT __declspec(dllexport)
@@ -54,7 +56,19 @@ typedef enum TfLiteStatus {
// incompatibility between runtime and delegate, e.g., this error is returned
// when trying to apply a TfLite delegate onto a model graph that's already
// immutable.
kTfLiteApplicationError = 3
kTfLiteApplicationError = 3,
// Generally referring to serialized delegate data not being found.
// See tflite::delegates::Serialization.
kTfLiteDelegateDataNotFound = 4,
// Generally referring to data-writing issues in delegate serialization.
// See tflite::delegates::Serialization.
kTfLiteDelegateDataWriteError = 5,
// Generally referring to data-reading issues in delegate serialization.
// See tflite::delegates::Serialization.
kTfLiteDelegateDataReadError = 5,
} TfLiteStatus;
// Types supported by tensor

View File

@@ -45,8 +45,10 @@ int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
#ifndef TF_LITE_STATIC_MEMORY
TfLiteIntArray* TfLiteIntArrayCreate(int size) {
TfLiteIntArray* ret =
(TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
int alloc_size = TfLiteIntArrayGetSizeInBytes(size);
if (alloc_size <= 0) return NULL;
TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
if (!ret) return ret;
ret->size = size;
return ret;
}
@@ -181,9 +183,9 @@ void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
}
// TODO(b/145340303): Tensor data should be aligned.
if (!tensor->data.raw) {
tensor->data.raw = malloc(num_bytes);
tensor->data.raw = (char*)malloc(num_bytes);
} else if (num_bytes > tensor->bytes) {
tensor->data.raw = realloc(tensor->data.raw, num_bytes);
tensor->data.raw = (char*)realloc(tensor->data.raw, num_bytes);
}
tensor->bytes = num_bytes;
}
@@ -229,7 +231,7 @@ const char* TfLiteTypeGetName(TfLiteType type) {
return "Unknown type";
}
TfLiteDelegate TfLiteDelegateCreate() {
TfLiteDelegate TfLiteDelegateCreate(void) {
TfLiteDelegate d = {
.data_ = NULL,
.Prepare = NULL,

View File

@@ -456,8 +456,8 @@ typedef struct TfLiteTensor {
} TfLiteTensor;
// A structure representing an instance of a node.
// This structure only exhibits the inputs, outputs and user defined data, not
// other features like the type.
// This structure only exhibits the inputs, outputs, user defined data and some
// node properties (like statefulness), not other features like the type.
typedef struct TfLiteNode {
// Inputs to this node expressed as indices into the simulator's tensors.
TfLiteIntArray* inputs;
@@ -490,6 +490,9 @@ typedef struct TfLiteNode {
// created by calling `interpreter.ModifyGraphWithDelegate`.
// WARNING: This is an experimental interface that is subject to change.
struct TfLiteDelegate* delegate;
// Whether this op might have side effect (e.g. stateful op).
bool might_have_side_effect;
} TfLiteNode;
#else // defined(TF_LITE_STATIC_MEMORY)?
// NOTE: This flag is opt-in only at compile time.
@@ -640,6 +643,7 @@ typedef struct TfLiteContext {
// TfLiteDelegates can traverse the current execution plan by iterating
// through each member of this array and using GetNodeAndRegistration() to
// access details about a node. i.e.
//
// TfLiteIntArray* execution_plan;
// TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
// for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
@@ -648,6 +652,28 @@ typedef struct TfLiteContext {
// TfLiteRegistration* reg;
// context->GetNodeAndRegistration(context, node_index, &node, &reg);
// }
// Note: the memory pointed by '`*execution_plan` is OWNED by TfLite runtime.
// Future calls to GetExecutionPlan invalidates earlier outputs. The following
// code snippet shows the issue of such an invocation pattern. After calling
// CheckNode, subsequent access to `plan_1st` is undefined.
//
// void CheckNode(const TfLiteNode* node) {
// ...
// TfLiteIntArray* plan_2nd;
// TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan_2nd));
// ...
// }
//
// TfLiteIntArray* plan_1st;
// TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan_1st));
// for (int exec_index = 0; exec_index < plan_1st->size; exec_index++) {
// int node_index = plan_1st->data[exec_index];
// TfLiteNode* node;
// TfLiteRegistration* reg;
// context->GetNodeAndRegistration(context, node_index, &node, &reg);
// CheckNode(node);
// }
//
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context,
TfLiteIntArray** execution_plan);
@@ -777,6 +803,18 @@ typedef struct TfLiteContext {
// WARNING: This method may not be available on all platforms.
TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
int tensor_idx);
// Retrieves named metadata buffer from the TFLite model.
// Returns kTfLiteOk if metadata is successfully obtained from the flatbuffer
// Model: that is, there exists a `metadata` entry with given `name` string.
// (see TFLite's schema.fbs).
// The corresponding `buffer` information is populated in `ptr` & `bytes`.
// The data from `ptr` is valid for the lifetime of the Interpreter.
//
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus (*GetModelMetadata)(const struct TfLiteContext* context,
const char* name, const char** ptr,
size_t* bytes);
} TfLiteContext;
typedef struct TfLiteRegistration {
@@ -918,7 +956,7 @@ typedef struct TfLiteDelegate {
// Build a 'null' delegate, with all the fields properly set to their default
// values.
TfLiteDelegate TfLiteDelegateCreate();
TfLiteDelegate TfLiteDelegateCreate(void);
#ifdef __cplusplus
} // extern "C"

View File

@@ -373,6 +373,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
return ParseReducer(op, error_reporter, allocator, builtin_data);
}
case BuiltinOperator_REDUCE_ALL: {
return ParseReducer(op, error_reporter, allocator, builtin_data);
}
case BuiltinOperator_REDUCE_MAX: {
return ParseReducer(op, error_reporter, allocator, builtin_data);
}
@@ -663,7 +667,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
return kTfLiteOk;
}
case BuiltinOperator_DELEGATE: {
// TODO(ycling): Revisit when supporting saving delegated models.
TF_LITE_REPORT_ERROR(error_reporter,
"DELEGATE op shouldn't exist in model.");
return kTfLiteError;
@@ -757,7 +760,8 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
*builtin_data = params.release();
return kTfLiteOk;
}
case BuiltinOperator_CONV_3D: {
case BuiltinOperator_CONV_3D:
case BuiltinOperator_CONV_3D_TRANSPOSE: {
auto params = safe_allocator.Allocate<TfLiteConv3DParams>();
TF_LITE_ENSURE(error_reporter, params != nullptr);
if (const auto* conv3d_params = op->builtin_options_as_Conv3DOptions()) {
@@ -789,6 +793,21 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
*builtin_data = params.release();
return kTfLiteOk;
}
case BuiltinOperator_VAR_HANDLE: {
auto params = safe_allocator.Allocate<TfLiteVarHandleParams>();
TF_LITE_ENSURE(error_reporter, params != nullptr);
params->container = nullptr;
params->shared_name = nullptr;
if (const auto* var_handle_params =
op->builtin_options_as_VarHandleOptions()) {
if (var_handle_params->container())
params->container = var_handle_params->container()->c_str();
if (var_handle_params->shared_name())
params->shared_name = var_handle_params->shared_name()->c_str();
}
*builtin_data = params.release();
return kTfLiteOk;
}
// Below are the ops with no builtin_data structure.
// TODO(aselle): Implement call in BuiltinOptions, but nullptrs are
// ok for now, since there is no call implementation either.
@@ -825,6 +844,9 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
case BuiltinOperator_HASHTABLE_FIND:
case BuiltinOperator_HASHTABLE_IMPORT:
case BuiltinOperator_HASHTABLE_SIZE:
case BuiltinOperator_READ_VARIABLE:
case BuiltinOperator_ASSIGN_VARIABLE:
case BuiltinOperator_BROADCAST_ARGS:
return kTfLiteOk;
case BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES:
return kTfLiteError;
@@ -1372,6 +1394,30 @@ TfLiteStatus ParseHardSwish(const Operator*, ErrorReporter*,
return kTfLiteOk;
}
TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data) {
CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
SafeBuiltinDataAllocator safe_allocator(allocator);
std::unique_ptr<TfLiteIfParams, SafeBuiltinDataAllocator::BuiltinDataDeleter>
params = safe_allocator.Allocate<TfLiteIfParams>();
TF_LITE_ENSURE(error_reporter, params != nullptr);
const IfOptions* schema_params = op->builtin_options_as_IfOptions();
if (schema_params != nullptr) {
params->then_subgraph_index = schema_params->then_subgraph_index();
params->else_subgraph_index = schema_params->else_subgraph_index();
} else {
// TODO(b/157480169): We should either return kTfLiteError or fill in some
// reasonable defaults in the params struct. We are not doing so until we
// better undertand the ramifications of changing the legacy behavior.
}
*builtin_data = params.release();
return kTfLiteOk;
}
TfLiteStatus ParseL2Normalization(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,

View File

@@ -181,6 +181,9 @@ TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,
void** builtin_data);
TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator, void** builtin_data);
TfLiteStatus ParseL2Normalization(const Operator* op,
ErrorReporter* error_reporter,
BuiltinDataAllocator* allocator,

View File

@@ -30,8 +30,7 @@ TfLiteStatus GetRegistrationFromOpCode(
auto builtin_code = GetBuiltinCode(opcode);
int version = opcode->version();
if (builtin_code > BuiltinOperator_MAX ||
builtin_code < BuiltinOperator_MIN) {
if (builtin_code > BuiltinOperator_MAX) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Op builtin_code out of range: %d. Are you using old TFLite binary "

View File

@@ -46,6 +46,22 @@ class OpResolver {
}
virtual ~OpResolver() {}
private:
/// Returns true if this OpResolver may contain any "user defined" ops.
/// By "user defined" ops, we mean any op definitions other than those
/// contained in tflite::ops::builtin::BuiltinOpResolver.
///
/// If this method returns true, it doesn't necessarily mean that the
/// OpResolver contains a user-defined op, just that the absence of
/// user-defined ops can't be guaranteed.
///
/// Note that "user-defined" ops are not the same as "custom" ops;
/// BuiltinOpResolver may support certain "custom" ops, in addition to
/// "builtin" ops, and may not support all of the "builtin" op enum values.
virtual bool MayContainUserDefinedOps() const { return true; }
friend class OpResolverInternal;
};
// Handles the logic for converting between an OperatorCode structure extracted

View File

@@ -279,81 +279,125 @@ inline Integer FloorLog2(Integer n) {
}
}
// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
// softmax
// func - the function to build the LUT for (e.g exp(x))
// min,max - table limits
// table - pointer to buffer
// num - number of elements in the LUT
inline void gen_lut(double (*func)(double), double min, double max,
int16_t* table, const int num) {
// size of table should equal to num + 1
// last element only for slope calculation
double step = (max - min) / (num - 1);
double half_step = step / 2.0;
for (int i = 0; i < num - 1; i++) {
double sample_val = TfLiteRound(func(min + i * step) * 32768.0);
double midpoint_interp_val =
TfLiteRound((func(min + (i + 1) * step) * 32768.0 +
TfLiteRound(func(min + i * step) * 32768.0)) /
2.0);
double midpoint_val =
TfLiteRound(func(min + i * step + half_step) * 32768.0);
double midpoint_err = midpoint_interp_val - midpoint_val;
double bias = TfLiteRound(midpoint_err / 2.0);
table[i] = std::min<double>(std::max<double>(sample_val - bias, -32768.0),
32767.0);
}
table[num - 1] = std::min<double>(
std::max<double>(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
// The size of the LUT depends on the type of input. For int8 inputs a simple
// 256 entries LUT is used. For int16 inputs the high 9 bits are used for
// indexing and the 7 remaining bits are used for interpolation. We thus use a
// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry
// to interpolate the last value.
template <typename LutInT>
constexpr int lut_size() {
static_assert(std::is_same<LutInT, int8_t>::value ||
std::is_same<LutInT, int16_t>::value,
"Only LUTs with int8 or int16 inputs are supported.");
return std::is_same<LutInT, int8_t>::value ? 256 : 513;
}
// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
// softmax
// func - the function to build the LUT for (e.g exp(x))
// min,max - table limits
// table - pointer to buffer
// num - number of elements in the LUT
inline void gen_lut(float (*func)(float), float min, float max, int16_t* table,
const int num) {
// size of table should equal to num + 1
// last element only for slope calculation
float step = (max - min) / (num - 1);
float half_step = step / 2.0f;
for (int i = 0; i < num - 1; i++) {
float sample_val = TfLiteRound(func(min + i * step) * 32768.0f);
float midpoint_interp_val =
TfLiteRound((func(min + (i + 1) * step) * 32768.0f +
TfLiteRound(func(min + i * step) * 32768.0f)) /
2.0f);
float midpoint_val =
TfLiteRound(func(min + i * step + half_step) * 32768.0f);
float midpoint_err = midpoint_interp_val - midpoint_val;
float bias = TfLiteRound(midpoint_err / 2.0f);
table[i] = std::min<float>(std::max<float>(sample_val - bias, -32768.0f),
32767.0f);
// Generate a LUT for 'func' which can be used to approximate functions like
// exp, log, ...
//
// - func: the function to build the LUT for (e.g exp(x))
// - input_min, input_max: range of the func inputs
// - output_min, output_max: range of the func outputs
// - lut: pointer to the LUT table to fill, the table must be of size
// lut_size<LutInT>()
template <typename FloatT, typename LutInT, typename LutOutT>
inline void gen_lut(FloatT (*func)(FloatT), FloatT input_min, FloatT input_max,
FloatT output_min, FloatT output_max, LutOutT* lut) {
static_assert(std::is_same<LutInT, int8_t>::value ||
std::is_same<LutInT, int16_t>::value,
"Only LUTs with int8 or int16 inputs are supported.");
static_assert(std::is_same<LutOutT, int8_t>::value ||
std::is_same<LutOutT, int16_t>::value,
"Only LUTs with int8 or int16 outputs are supported.");
static_assert(std::is_floating_point<FloatT>::value,
"FloatT must be a floating-point type.");
const int nb_steps = std::is_same<LutInT, int8_t>::value ? 256 : 512;
const FloatT step = (input_max - input_min) / nb_steps;
const FloatT half_step = step / 2;
const FloatT output_scaling_inv =
static_cast<FloatT>(std::numeric_limits<LutOutT>::max() -
std::numeric_limits<LutOutT>::min() + 1) /
(output_max - output_min);
const FloatT table_min =
static_cast<FloatT>(std::numeric_limits<LutOutT>::min());
const FloatT table_max =
static_cast<FloatT>(std::numeric_limits<LutOutT>::max());
for (int i = 0; i < nb_steps; i++) {
const FloatT val = func(input_min + i * step);
const FloatT val_midpoint = func(input_min + i * step + half_step);
const FloatT val_next = func(input_min + (i + 1) * step);
const FloatT sample_val = TfLiteRound(val * output_scaling_inv);
const FloatT midpoint_interp_val =
TfLiteRound((val_next * output_scaling_inv +
TfLiteRound(val * output_scaling_inv)) /
2);
const FloatT midpoint_val = TfLiteRound(val_midpoint * output_scaling_inv);
const FloatT midpoint_err = midpoint_interp_val - midpoint_val;
const FloatT bias = TfLiteRound(midpoint_err / 2);
lut[i] = static_cast<LutOutT>(std::min<FloatT>(
std::max<FloatT>(sample_val - bias, table_min), table_max));
}
const bool with_extra_interpolation_value =
std::is_same<LutInT, int16_t>::value;
if (with_extra_interpolation_value) {
lut[nb_steps] = static_cast<LutOutT>(std::min<FloatT>(
std::max<FloatT>(TfLiteRound(func(input_max) * output_scaling_inv),
table_min),
table_max));
}
table[num - 1] = std::min<float>(
std::max<float>(TfLiteRound(func(max) * 32768.0f), -32768.0f), 32767.0f);
}
// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
// 512 base value, lut[513] only for calculate slope
uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
// LUT must have 513 values
template <typename LutOutT>
inline LutOutT lut_lookup_with_interpolation(int16_t value,
const LutOutT* lut) {
static_assert(std::is_same<LutOutT, int8_t>::value ||
std::is_same<LutOutT, int16_t>::value,
"Only LUTs with int8 or int16 outputs are supported.");
// 512 base values, lut[513] is only used to calculate the slope
const uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
assert(index < 512 && "LUT index out of range.");
int16_t offset = value & 0x7f;
const int16_t offset = value & 0x7f;
// base and slope are Q0.15
int16_t base = lut[index];
int16_t slope = lut[index + 1] - lut[index];
// Base and slope are Q0.x
const LutOutT base = lut[index];
const LutOutT slope = lut[index + 1] - lut[index];
// Q0.15 * Q0.7 = Q0.22
// Round and convert from Q0.22 to Q0.15
int32_t delta = (static_cast<int32_t>(slope) * offset + 64) >> 7;
// Q0.x * Q0.7 = Q0.(x + 7)
// Round and convert from Q0.(x + 7) to Q0.x
const int delta = (slope * offset + 64) >> 7;
// Q0.15 + Q0.15
return base + delta;
return static_cast<LutOutT>(base + delta);
}
// int16_t -> int16_t table lookup with interpolation
// LUT must have 513 values
inline int16_t lut_lookup(int16_t value, const int16_t* lut) {
return lut_lookup_with_interpolation(value, lut);
}
// int16_t -> int8_t table lookup with interpolation
// LUT must have 513 values
inline int8_t lut_lookup(int16_t value, const int8_t* lut) {
return lut_lookup_with_interpolation(value, lut);
}
// int8_t -> int8_t table lookup without interpolation
// LUT must have 256 values
inline int8_t lut_lookup(int8_t value, const int8_t* lut) {
return lut[128 + value];
}
// int8_t -> int16_t table lookup without interpolation
// LUT must have 256 values
inline int16_t lut_lookup(int8_t value, const int16_t* lut) {
return lut[128 + value];
}
// Table of sigmoid(i/24) at 0.16 format - 256 elements.
@@ -575,7 +619,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
// InputIntegerBits - z_b_headroom - 0.25);
const FixedPointAccum z_a_pow_2_adj = SaturatingAddNonGemmlowp(
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
InputIntegerBits - z_a_headroom_plus_1, 31 - kAccumIntegerBits)),
static_cast<int32_t>(InputIntegerBits - z_a_headroom_plus_1),
31 - kAccumIntegerBits)),
shifted_quarter);
// z_b is treated like z_a, but premultiplying by sqrt(0.5).
@@ -585,7 +630,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl(
SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom);
const FixedPointAccum z_b_pow_2_adj = SaturatingSub(
FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam(
InputIntegerBits - z_b_headroom, 31 - kAccumIntegerBits)),
static_cast<int32_t>(InputIntegerBits - z_b_headroom),
31 - kAccumIntegerBits)),
shifted_quarter);
const FixedPoint0 r = FixedPoint0::FromRaw(std::min(r_a_raw, r_b_raw));

View File

@@ -19,9 +19,8 @@ limitations under the License.
namespace tflite {
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
defined(__ZEPHYR__)
#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
(defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__)
#define TF_LITE_GLOBAL_STD_PREFIX
#else
#define TF_LITE_GLOBAL_STD_PREFIX std

View File

@@ -15,26 +15,6 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#define USE_NEON
#include <arm_neon.h>
#endif
#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
#define USE_NEON
#include "NEON_2_SSE.h"
#endif
// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
// defined, PortableSomeFunc(args) otherwise.
#ifdef USE_NEON
// Always use Neon code
#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
#else
// No NEON available: Use Portable code
#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
#endif // defined(USE_NEON)
// TFLM does not need to utilize any Neon optimizations.
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_

View File

@@ -15,6 +15,8 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
#include <type_traits>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
@@ -27,25 +29,14 @@ inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
T activation_min, activation_max;
GetActivationParams(params, &activation_min, &activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] + input2_data[i], params.quantized_activation_min,
params.quantized_activation_max);
}
}
inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const float* input1_data,
const RuntimeShape& input2_shape, const float* input2_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; i++) {
auto x = input1_data[i] + input2_data[i];
output_data[i] = ActivationFunctionWithMinMax(
x, params.float_activation_min, params.float_activation_max);
input1_data[i] + input2_data[i], activation_min, activation_max);
}
}
@@ -202,13 +193,12 @@ inline void Add(const ArithmeticParams& params,
}
}
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const float* input1_data,
const RuntimeShape& input2_shape,
const float* input2_data,
const RuntimeShape& output_shape,
float* output_data) {
template <typename T>
inline typename std::enable_if<!is_small_integer<T>::value, void>::type
BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@@ -216,6 +206,9 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
T activation_min, activation_max;
GetActivationParams(params, &activation_min, &activation_max);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
@@ -232,51 +225,10 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
output_data[Offset(extended_output_shape, b, y, x, c)] =
ActivationFunctionWithMinMax(
ActivationFunctionWithMinMax<T>(
input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
params.float_activation_min, params.float_activation_max);
}
}
}
}
}
inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int32_t* input1_data,
const RuntimeShape& input2_shape,
const int32_t* input2_data,
const RuntimeShape& output_shape,
int32_t* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
&desc2);
const RuntimeShape extended_output_shape =
RuntimeShape::ExtendedShape(4, output_shape);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest stride,
// typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
//
// We name our variables by their Tensorflow convention, but generate C code
// nesting loops such that the innermost loop has the smallest stride for the
// best cache behavior.
for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
output_data[Offset(extended_output_shape, b, y, x, c)] =
ActivationFunctionWithMinMax(
input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
input2_data[SubscriptToIndex(desc2, b, y, x, c)],
params.quantized_activation_min,
params.quantized_activation_max);
activation_min, activation_max);
}
}
}
@@ -287,10 +239,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
// is 32-bit for both cases. The overflow does not happen due to the
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
template <typename T>
inline void BroadcastAdd4DSlow(
const ArithmeticParams& params, const RuntimeShape& input1_shape,
const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
inline typename std::enable_if<is_small_integer<T>::value, void>::type
BroadcastAdd4DSlow(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,

View File

@@ -15,7 +15,10 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
#include "tensorflow/lite/kernels/internal/types.h"
#include <algorithm>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
@@ -36,6 +39,47 @@ inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
}
}
inline void AddN(const ArithmeticParams& params,
const RuntimeShape& input_shape, const size_t num_inputs,
const int8_t* const* input_data, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
// All inputs should have same zero-point and scale, this is checked during
// Prepare stage.
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
// All inputs and output should have the same shape, this is checked during
// Prepare stage.
const size_t size = input_shape.FlatSize();
for (size_t i = 0; i < size; ++i) {
// accumulate in scaled_x before clamping to avoid overflow
const int32_t x = params.input1_offset; // x = 0
const int32_t shifted_x = x * (1 << params.left_shift);
int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_x, params.input1_multiplier, params.input1_shift);
for (size_t j = 0; j < num_inputs; ++j) {
const int32_t y = params.input1_offset + input_data[j][i];
const int32_t shifted_y = y * (1 << params.left_shift);
int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_y, params.input1_multiplier, params.input1_shift);
scaled_x += scaled_y;
}
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
scaled_x, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<int8_t>(clamped_output);
}
}
} // namespace reference_ops
} // namespace tflite

View File

@@ -0,0 +1,275 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
#include <algorithm>
#include <cstdint>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/tensor_utils_common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
namespace batch_matmul {
// Determine which dimension is the broadcast dimension.
inline int broadcast_dim(int lhs_dim, int rhs_dim) {
if (lhs_dim == rhs_dim) return lhs_dim;
if (lhs_dim == 1) return rhs_dim;
TFLITE_DCHECK_EQ(rhs_dim, 1);
return lhs_dim;
}
// Compute the "extent" for iterating on this dimension.
// If we are broadcasting, then don't advance (i.e return 0).
inline int extent(const RuntimeShape& shape, int x) {
if (shape.Dims(x) == 1) {
return 0;
}
int prod = 1;
for (int i = x + 1; i < shape.DimensionsCount(); ++i) {
prod *= shape.Dims(i);
}
return prod;
}
} // namespace batch_matmul
template <typename Ta, typename Tb, typename Tout>
inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data,
const RuntimeShape& rhs_shape, const Tb* rhs_data,
const RuntimeShape& output_shape, Tout* output_data) {
const RuntimeShape extended_lhs_shape =
RuntimeShape::ExtendedShape(5, lhs_shape);
const RuntimeShape extended_rhs_shape =
RuntimeShape::ExtendedShape(5, rhs_shape);
const int batch_dim0 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
const int batch_dim1 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
const int batch_dim2 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
// Set params for each matrix multiply.
const int lhs_rows = extended_lhs_shape.Dims(3);
const int rhs_cols = extended_rhs_shape.Dims(4);
const int accum_depth = extended_lhs_shape.Dims(4);
for (int b0 = 0; b0 < batch_dim0; ++b0) {
const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
for (int b1 = 0; b1 < batch_dim1; ++b1) {
const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
for (int b2 = 0; b2 < batch_dim2; ++b2) {
const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
b1 * batch_dim2 + b2) *
lhs_rows * rhs_cols;
for (int j = 0; j < rhs_cols; ++j) {
for (int i = 0; i < lhs_rows; ++i) {
Tout total = 0;
for (int k = 0; k < accum_depth; ++k) {
total += static_cast<Tout>(lhs_ptr2[accum_depth * i + k]) *
static_cast<Tout>(rhs_ptr2[j * accum_depth + k]);
}
int idx = lhs_rows * j + i;
out_ptr[idx] = total;
}
}
}
}
}
}
inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data,
const RuntimeShape& rhs_shape, const int8_t* rhs_data,
const float* scaling_factors,
const int32_t* input_offset, int32_t* row_sums,
const RuntimeShape& output_shape, float* output_data,
bool* compute_row_sums) {
const RuntimeShape extended_lhs_shape =
RuntimeShape::ExtendedShape(5, lhs_shape);
const RuntimeShape extended_rhs_shape =
RuntimeShape::ExtendedShape(5, rhs_shape);
const int batch_dim0 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
const int batch_dim1 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
const int batch_dim2 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
// Set params for each matrix multiply.
const int lhs_rows = extended_lhs_shape.Dims(3);
const int rhs_cols = extended_rhs_shape.Dims(4);
const int accum_depth = extended_lhs_shape.Dims(4);
const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols;
const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols;
const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols;
const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows;
const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows;
const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows;
if (!compute_row_sums || *compute_row_sums) {
int num_weights_matrices = 1;
for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) {
num_weights_matrices *= extended_lhs_shape.Dims(i);
}
tensor_utils::ReductionSumVector(
lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth);
if (compute_row_sums) {
*compute_row_sums = false;
}
}
for (int b0 = 0; b0 < batch_dim0; ++b0) {
const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0);
const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0);
const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0);
for (int b1 = 0; b1 < batch_dim1; ++b1) {
const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1);
const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1);
const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1);
for (int b2 = 0; b2 < batch_dim2; ++b2) {
const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2);
const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2);
const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2);
float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
b1 * batch_dim2 + b2) *
lhs_rows * rhs_cols;
for (int j = 0; j < rhs_cols; ++j) {
const float batch_scaling_factor = scale_ptr2[j];
const float batch_offset = static_cast<float>(ioff_ptr2[j]);
for (int i = 0; i < lhs_rows; ++i) {
int32_t total = 0;
for (int k = 0; k < accum_depth; ++k) {
total +=
lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k];
}
int32_t row_sum = woff_ptr2[i];
total -= row_sum * batch_offset;
int idx = lhs_rows * j + i;
out_ptr[idx] += batch_scaling_factor * total;
}
}
}
}
}
}
template <typename T, typename AccumT>
inline void BatchMatMul(const FullyConnectedParams& params,
const RuntimeShape& lhs_shape, const T* lhs_data,
const RuntimeShape& rhs_shape, const T* rhs_data,
const RuntimeShape& output_shape, T* output_data) {
const RuntimeShape extended_lhs_shape =
RuntimeShape::ExtendedShape(5, lhs_shape);
const RuntimeShape extended_rhs_shape =
RuntimeShape::ExtendedShape(5, rhs_shape);
const int batch_dim0 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
const int batch_dim1 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
const int batch_dim2 = batch_matmul::broadcast_dim(
extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
// Set params for each matrix multiply.
const int lhs_rows = extended_lhs_shape.Dims(3);
const int rhs_cols = extended_rhs_shape.Dims(4);
const int accum_depth = extended_lhs_shape.Dims(4);
const int32_t input_offset = params.input_offset;
const int32_t filter_offset = params.weights_offset;
const int32_t output_offset = params.output_offset;
const int32_t output_multiplier = params.output_multiplier;
const int output_shift = params.output_shift;
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
for (int b0 = 0; b0 < batch_dim0; ++b0) {
const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
for (int b1 = 0; b1 < batch_dim1; ++b1) {
const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
for (int b2 = 0; b2 < batch_dim2; ++b2) {
const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
T* out_ptr = output_data +
((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
lhs_rows * rhs_cols;
for (int j = 0; j < rhs_cols; ++j) {
for (int i = 0; i < lhs_rows; ++i) {
AccumT total = 0;
for (int k = 0; k < accum_depth; ++k) {
AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
AccumT rhs_val = rhs_ptr2[accum_depth * j + k];
total += (lhs_val + filter_offset) * (rhs_val + input_offset);
}
int32_t total_scaled = MultiplyByQuantizedMultiplier(
total, output_multiplier, output_shift);
total_scaled += output_offset;
total_scaled = std::max(total_scaled, output_activation_min);
total_scaled = std::min(total_scaled, output_activation_max);
const int idx = lhs_rows * j + i;
out_ptr[idx] = static_cast<T>(total_scaled);
}
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_

View File

@@ -0,0 +1,175 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
#include <algorithm>
#include <cstdint>
#include <limits>
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
bool exclusive, bool reverse, T* output_data) {
const int32_t rank = shape.DimensionsCount();
TFLITE_DCHECK_GE(rank, 1);
TFLITE_DCHECK_GE(axis, 0);
TFLITE_DCHECK_LT(axis, rank);
size_t inner = 1;
size_t outer = 1;
size_t depth = 1;
for (int32_t i = 0; i < rank; i++) {
if (i < axis)
inner *= shape.Dims(i);
else if (i > axis)
outer *= shape.Dims(i);
else
depth = shape.Dims(i);
}
for (size_t outer_index = 0; outer_index < outer; outer_index++) {
size_t outer_index_adj;
if (reverse)
outer_index_adj = (outer - 1) - outer_index;
else
outer_index_adj = outer_index;
for (size_t inner_index = 0; inner_index < inner; inner_index++) {
T accumulator = 0;
size_t inner_index_adj;
if (reverse)
inner_index_adj = (inner - 1) - inner_index;
else
inner_index_adj = inner_index;
for (size_t depth_index = 0; depth_index < depth; depth_index++) {
size_t depth_index_adj;
if (reverse)
depth_index_adj = (depth - 1) - depth_index;
else
depth_index_adj = depth_index;
size_t index = outer_index_adj;
index += inner_index_adj * depth * outer;
index += depth_index_adj * outer;
if (exclusive) {
output_data[index] = accumulator;
accumulator += input_data[index];
} else {
accumulator += input_data[index];
output_data[index] = accumulator;
}
}
}
}
}
//
// Quantized INT8 CUMSUM
//
inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
const RuntimeShape& shape, int32_t axis, bool exclusive,
bool reverse, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
// All inputs should have same zero-point and scale, this is checked during
// Prepare stage.
TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
const int32_t rank = shape.DimensionsCount();
TFLITE_DCHECK_GE(rank, 1);
TFLITE_DCHECK_GE(axis, 0);
TFLITE_DCHECK_LT(axis, rank);
size_t inner = 1;
size_t outer = 1;
size_t depth = 1;
for (int32_t i = 0; i < rank; i++) {
if (i < axis)
inner *= shape.Dims(i);
else if (i > axis)
outer *= shape.Dims(i);
else
depth = shape.Dims(i);
}
for (size_t outer_index = 0; outer_index < outer; outer_index++) {
size_t outer_index_adj;
if (reverse)
outer_index_adj = (outer - 1) - outer_index;
else
outer_index_adj = outer_index;
for (size_t inner_index = 0; inner_index < inner; inner_index++) {
int32_t accumulator = params.input1_offset; // accumulator = 0
accumulator *= (1 << params.left_shift);
accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
accumulator, params.input1_multiplier, params.input1_shift);
size_t inner_index_adj;
if (reverse)
inner_index_adj = (inner - 1) - inner_index;
else
inner_index_adj = inner_index;
for (size_t depth_index = 0; depth_index < depth; depth_index++) {
size_t depth_index_adj;
if (reverse)
depth_index_adj = (depth - 1) - depth_index;
else
depth_index_adj = depth_index;
size_t index = outer_index_adj;
index += inner_index_adj * depth * outer;
index += depth_index_adj * outer;
const int32_t y = params.input1_offset + input_data[index];
const int32_t shifted_y = y * (1 << params.left_shift);
const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
shifted_y, params.input1_multiplier, params.input1_shift);
int32_t scaled_output;
if (exclusive) {
scaled_output = accumulator;
accumulator += scaled_y;
} else {
accumulator += scaled_y;
scaled_output = accumulator;
}
const int32_t raw_output =
MultiplyByQuantizedMultiplierSmallerThanOneExp(
scaled_output, params.output_multiplier, params.output_shift) +
params.output_offset;
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output));
output_data[index] = static_cast<int8_t>(clamped_output);
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_

View File

@@ -0,0 +1,79 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int input_depth = input_shape.Dims(3);
const int input_width = input_shape.Dims(2);
const int input_height = input_shape.Dims(1);
const int input_batch = input_shape.Dims(0);
const int output_depth = output_shape.Dims(3);
const int output_width = output_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_batch = output_shape.Dims(0);
const int32_t block_size = op_params.block_size;
TFLITE_DCHECK_EQ(input_width * block_size, output_width);
TFLITE_DCHECK_EQ(input_height * block_size, output_height);
TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size);
TFLITE_DCHECK_EQ(input_batch, output_batch);
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_h = 0; out_h < output_height; ++out_h) {
for (int out_w = 0; out_w < output_width; ++out_w) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
const int in_d =
out_d + ((out_h % block_size) * block_size + out_w % block_size) *
output_depth;
const int in_w = out_w / block_size;
const int in_h = out_h / block_size;
const int in_b = out_b;
const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
const int output_index =
Offset(output_shape, out_b, out_h, out_w, out_d);
output_data[output_index] = input_data[input_index];
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_

View File

@@ -1,239 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_
#include <algorithm>
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void DivCheckArithmeticParams(const ArithmeticParams& params) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
// Input offset is negative input zero point. Activation tensors are
// asymmetric quantized so they span the full int8 range.
constexpr int32_t max_value =
static_cast<int32_t>(std::numeric_limits<T>::max());
TFLITE_DCHECK_GE(params.input1_offset, -max_value);
TFLITE_DCHECK_LE(params.input1_offset, max_value);
TFLITE_DCHECK_GE(params.input2_offset, -max_value);
TFLITE_DCHECK_LE(params.input2_offset, max_value);
TFLITE_DCHECK_GE(params.output_offset, -max_value);
TFLITE_DCHECK_LE(params.output_offset, max_value);
}
// Element-wise div that can often be used for inner loop of broadcast Div as
// well as the non-broadcast Div.
template <typename T>
inline void DivElementwise(int size, const ArithmeticParams& params,
const T* input1_data, const T* input2_data,
T* output_data) {
DivCheckArithmeticParams<T>(params);
for (int i = 0; i < size; ++i) {
const int32_t input1_val = params.input1_offset + input1_data[i];
const int32_t input2_val = params.input2_offset + input2_data[i];
TFLITE_DCHECK_NE(input2_val, 0);
int recip_shift;
const int32_t input2_inv =
(input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
: -GetReciprocal(-input2_val, 31, &recip_shift);
const int headroom = CountLeadingSignBits(input1_val);
const int32_t unscaled_quotient =
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
headroom);
const int total_shift = params.output_shift - recip_shift - headroom;
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplierSmallerThanOneExp(
unscaled_quotient, params.output_multiplier, total_shift);
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[i] = static_cast<T>(clamped_output);
}
}
inline void Div(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const uint8_t* input1_data,
const RuntimeShape& input2_shape, const uint8_t* input2_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void Div(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int8_t* input1_data,
const RuntimeShape& input2_shape, const int8_t* input2_data,
const RuntimeShape& output_shape, int8_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
DivElementwise(flat_size, params, input1_data, input2_data, output_data);
}
template <typename T, int N = 5>
inline void BroadcastDivSlowQuantized(
const ArithmeticParams& params, const RuntimeShape& unextended_input1_shape,
const T* input1_data, const RuntimeShape& unextended_input2_shape,
const T* input2_data, const RuntimeShape& unextended_output_shape,
T* output_data) {
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
DivCheckArithmeticParams<T>(params);
auto div_func = [&](int indexes[N]) {
const int32_t input1_val =
params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
const int32_t input2_val =
params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
TFLITE_DCHECK_NE(input2_val, 0);
int recip_shift;
const int32_t input2_inv =
(input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
: -GetReciprocal(-input2_val, 31, &recip_shift);
const int headroom = CountLeadingSignBits(input1_val);
const int32_t unscaled_quotient =
MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
headroom);
const int total_shift = params.output_shift - recip_shift - headroom;
const int32_t unclamped_result =
params.output_offset +
MultiplyByQuantizedMultiplierSmallerThanOneExp(
unscaled_quotient, params.output_multiplier, total_shift);
const int32_t clamped_output =
std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, unclamped_result));
output_data[SubscriptToIndex(output_desc, indexes)] =
static_cast<T>(clamped_output);
};
NDOpsHelper<N>(output_desc, div_func);
}
template <int N = 5>
inline void BroadcastDivSlow(const ArithmeticParams& params,
const RuntimeShape& unextended_input1_shape,
const uint8_t* input1_data,
const RuntimeShape& unextended_input2_shape,
const uint8_t* input2_data,
const RuntimeShape& unextended_output_shape,
uint8_t* output_data) {
BroadcastDivSlowQuantized<uint8_t, N>(
params, unextended_input1_shape, input1_data, unextended_input2_shape,
input2_data, unextended_output_shape, output_data);
}
template <int N = 5>
inline void BroadcastDivSlow(const ArithmeticParams& params,
const RuntimeShape& unextended_input1_shape,
const int8_t* input1_data,
const RuntimeShape& unextended_input2_shape,
const int8_t* input2_data,
const RuntimeShape& unextended_output_shape,
int8_t* output_data) {
BroadcastDivSlowQuantized<int8_t, N>(
params, unextended_input1_shape, input1_data, unextended_input2_shape,
input2_data, unextended_output_shape, output_data);
}
// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
// dimensionality if the runtime code does a single loop over one dimension
// that handles broadcasting as the base case. The code generator would then
// generate max(D1, D2) nested for loops.
template <typename T, int N = 5>
void BroadcastDivSlow(const ArithmeticParams& params,
const RuntimeShape& unextended_input1_shape,
const T* input1_data,
const RuntimeShape& unextended_input2_shape,
const T* input2_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
T output_activation_min;
T output_activation_max;
GetActivationParams(params, &output_activation_min, &output_activation_max);
TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
NdArrayDesc<N> desc1;
NdArrayDesc<N> desc2;
NdArrayDesc<N> output_desc;
NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
unextended_input2_shape, &desc1, &desc2);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
// In Tensorflow, the dimensions are canonically named (batch_number, row,
// col, channel), with extents (batches, height, width, depth), with the
// trailing dimension changing most rapidly (channels has the smallest
// stride, typically 1 element).
//
// In generated C code, we store arrays with the dimensions reversed. The
// first dimension has smallest stride.
auto div_func = [&](int indexes[N]) {
output_data[SubscriptToIndex(output_desc, indexes)] =
ActivationFunctionWithMinMax(
input1_data[SubscriptToIndex(desc1, indexes)] /
input2_data[SubscriptToIndex(desc2, indexes)],
output_activation_min, output_activation_max);
};
NDOpsHelper<N>(output_desc, div_func);
}
template <typename T>
inline void Div(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const T* input1_data,
const RuntimeShape& input2_shape, const T* input2_data,
const RuntimeShape& output_shape, T* output_data) {
T output_activation_min;
T output_activation_max;
GetActivationParams(params, &output_activation_min, &output_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] / input2_data[i], output_activation_min,
output_activation_max);
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DIV_H_

View File

@@ -0,0 +1,35 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
#include <cmath>
#include <functional>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
T FloorDiv(T input1, T input2) {
return std::floor(std::divides<double>()(static_cast<double>(input1),
static_cast<double>(input2)));
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_

View File

@@ -0,0 +1,44 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
#include <cmath>
#include <functional>
namespace tflite {
namespace reference_ops {
template <typename T>
T FloorMod(T input1, T input2) {
struct FloatMod {
float operator()(const float lhs, const float rhs) const {
return std::fmod(lhs, rhs);
}
};
using ModFunc = typename std::conditional<std::is_integral<T>::value,
std::modulus<T>, FloatMod>::type;
ModFunc mod_func;
T trunc_mod = mod_func(input1, input2);
return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0))
? (trunc_mod + input2)
: trunc_mod;
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_

View File

@@ -21,7 +21,7 @@ limitations under the License.
namespace tflite {
namespace reference_integer_ops {
inline void AveragePool(const PoolParams& params,
inline bool AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const int8_t* input_data,
const RuntimeShape& output_shape, int8_t* output_data) {
@@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params,
filter_count++;
}
}
if (filter_count == 0) return false;
// Round to the closest integer value.
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
: (acc - filter_count / 2) / filter_count;
@@ -77,6 +78,7 @@ inline void AveragePool(const PoolParams& params,
}
}
}
return true;
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
@@ -136,7 +138,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
}
}
inline void AveragePool(const PoolParams& params,
inline bool AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const int16_t* input_data,
const RuntimeShape& output_shape,
@@ -182,6 +184,7 @@ inline void AveragePool(const PoolParams& params,
filter_count++;
}
}
if (filter_count == 0) return false;
// Round to the closest integer value.
acc = acc > 0 ? (acc + filter_count / 2) / filter_count
: (acc - filter_count / 2) / filter_count;
@@ -193,6 +196,7 @@ inline void AveragePool(const PoolParams& params,
}
}
}
return true;
}
inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,

View File

@@ -0,0 +1,256 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_
#include <algorithm>
#include <cstddef>
#include <limits>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
namespace tflite {
namespace reference_ops {
inline void LogSoftmax(const SoftmaxParams& params,
const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
for (int i = 0; i < outer_size; ++i) {
// Find max element value which we'll use to ensure numerical stability
// taking advantage of the following equality:
// log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C)))
float max = std::numeric_limits<float>::lowest();
for (int c = 0; c < depth; ++c) {
max = std::max(max, input_data[i * depth + c]);
}
// Compute sum.
float sum = 0.f;
for (int c = 0; c < depth; ++c) {
sum += std::exp(input_data[i * depth + c] - max);
}
// Compute result.
const float log_sum = std::log(sum);
for (int c = 0; c < depth; ++c) {
output_data[i * depth + c] = input_data[i * depth + c] - max - log_sum;
}
}
}
inline void LogSoftmax(const SoftmaxParams& params,
const RuntimeShape& input_shape,
const uint8_t* input_data,
const RuntimeShape& output_shape, uint8_t* output_data) {
const int32_t input_multiplier = params.input_multiplier;
const int32_t input_left_shift = params.input_left_shift;
const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
const int32_t reverse_scaling_right_shift =
params.reverse_scaling_right_shift;
const int diff_min = params.diff_min;
// The representation chosen for the input to the exp() function is Q5.26.
// We need to leave extra space since values that we skip might be as large
// as -32 before multiplying by input_beta_multiplier, and therefore as
// large as -16 afterwards. Note that exp(-8) is definitely not
// insignificant to accumulation, but exp(-16) definitely is.
static constexpr int kScaledDiffIntegerBits = 5;
static constexpr int kAccumulationIntegerBits = 12;
static constexpr int kOutputIntegerBits = 4;
using FixedPointScaledDiff =
gemmlowp::FixedPoint<int32_t, kScaledDiffIntegerBits>;
using FixedPointAccum =
gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
for (int i = 0; i < outer_size; ++i) {
uint8_t max_in_row = 0;
for (int c = 0; c < depth; ++c) {
max_in_row = std::max(max_in_row, input_data[i * depth + c]);
}
FixedPointAccum sum_of_exps = FixedPointAccum::Zero();
for (int c = 0; c < depth; ++c) {
int32_t input_diff =
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
if (input_diff >= diff_min) {
const int32_t input_diff_rescaled =
MultiplyByQuantizedMultiplierGreaterThanOne(
input_diff, input_multiplier, input_left_shift);
const FixedPointScaledDiff scaled_diff_f8 =
FixedPointScaledDiff::FromRaw(input_diff_rescaled);
sum_of_exps = sum_of_exps + gemmlowp::Rescale<kAccumulationIntegerBits>(
exp_on_negative_values(scaled_diff_f8));
}
}
const int32_t fixed_log_sum_of_exps =
log_x_for_x_greater_than_or_equal_to_1<kScaledDiffIntegerBits>(
sum_of_exps)
.raw();
// rescaled_diff_min is smallest representable in
// Q(kScaledDiffIntegerBits).(31-kScaledDiffIntegerBits) plus the
// log-sub-exps that will be subtracted in the loop.
//
// The thresholds diff_min, etc are negative.
const int rescaled_diff_min =
fixed_log_sum_of_exps + std::numeric_limits<int32_t>::lowest();
const int adjusted_diff_min =
std::max(static_cast<int32_t>(
diff_min - 1), // Note use of > below instead of >= above.
MultiplyByQuantizedMultiplierSmallerThanOneExp(
rescaled_diff_min, reverse_scaling_divisor,
-reverse_scaling_right_shift));
for (int c = 0; c < depth; ++c) {
int32_t input_diff =
static_cast<int32_t>(input_data[i * depth + c]) - max_in_row;
if (input_diff > adjusted_diff_min) {
const int32_t input_diff_rescaled =
MultiplyByQuantizedMultiplierGreaterThanOne(
input_diff, input_multiplier, input_left_shift);
int32_t unsat_output =
gemmlowp::RoundingDivideByPOT(
(input_diff_rescaled - fixed_log_sum_of_exps),
31 - kScaledDiffIntegerBits - kOutputIntegerBits) +
255;
output_data[i * depth + c] = static_cast<uint8_t>(
std::max(std::min(unsat_output, static_cast<int32_t>(255)),
static_cast<int32_t>(0)));
} else {
// Set output to smallest value.
output_data[i * depth + c] = 0;
}
}
}
}
template <typename T>
inline void LogSoftmaxQuantized(const SoftmaxParams& params,
const size_t outer_size, const size_t depth,
const RuntimeShape& input_shape,
const T* input_data,
const RuntimeShape& output_shape,
T* output_data) {
const int32_t input_multiplier = params.input_multiplier;
const int32_t input_left_shift = params.input_left_shift;
const int32_t reverse_scaling_divisor = params.reverse_scaling_divisor;
const int32_t reverse_scaling_right_shift =
params.reverse_scaling_right_shift;
const int diff_min = params.diff_min;
static constexpr T kMinT8 = std::numeric_limits<T>::min();
static constexpr T kMaxT8 = std::numeric_limits<T>::max();
static constexpr int32_t kMinInt32 = std::numeric_limits<int32_t>::min();
// All IntegerBits must agree with Prepare function.
// Input is chosen as Q5.26 so exp(-1 * 2^5 * 2^-1) = exp(-16) is negligible.
static constexpr int kInputIntegerBits = 5;
static constexpr int kAccumulationIntegerBits = 12;
static constexpr int kOutputIntegerBits = 4;
using F5 = gemmlowp::FixedPoint<int32_t, kInputIntegerBits>;
using F12 = gemmlowp::FixedPoint<int32_t, kAccumulationIntegerBits>;
for (size_t outer_index = 0; outer_index < outer_size; ++outer_index) {
T max_in_row = kMinT8;
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
max_in_row =
std::max(max_in_row, input_data[outer_index * depth + inner_index]);
}
// Accumulator "sum_of_exps_in_q12" is safe from overflowing in 2^12 steps.
F12 sum_of_exps_in_q12 = F12::FromRaw(0);
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input_diff =
static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
max_in_row;
if (input_diff >= diff_min) {
const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
input_diff, input_multiplier, input_left_shift);
sum_of_exps_in_q12 =
sum_of_exps_in_q12 +
gemmlowp::Rescale<kAccumulationIntegerBits>(
exp_on_negative_values(F5::FromRaw(input_diff_in_q5)));
}
}
const int32_t log_sum_of_exps_in_q5 =
log_x_for_x_greater_than_or_equal_to_1<kInputIntegerBits>(
sum_of_exps_in_q12)
.raw();
// Potentially reduced the valid range. shifted_log_sum_of_exps_in_q5 is
// smallest representable in Q5.26 plus the log_sum_of_exps.
const int32_t shifted_log_sum_of_exps_in_q5 =
log_sum_of_exps_in_q5 + kMinInt32;
const int32_t adjusted_diff_min =
std::max(static_cast<int32_t>(diff_min - 1),
MultiplyByQuantizedMultiplier(shifted_log_sum_of_exps_in_q5,
reverse_scaling_divisor,
-reverse_scaling_right_shift));
for (size_t inner_index = 0; inner_index < depth; ++inner_index) {
int32_t input_diff =
static_cast<int32_t>(input_data[outer_index * depth + inner_index]) -
max_in_row;
// Note use of > below instead of >= above.
if (input_diff > adjusted_diff_min) {
const int32_t input_diff_in_q5 = MultiplyByQuantizedMultiplier(
input_diff, input_multiplier, input_left_shift);
// Rescale and downcast.
int32_t output_in_q27 =
gemmlowp::RoundingDivideByPOT(
(input_diff_in_q5 - log_sum_of_exps_in_q5),
31 - kInputIntegerBits - kOutputIntegerBits) +
kMaxT8;
output_in_q27 =
std::max(std::min(output_in_q27, static_cast<int32_t>(kMaxT8)),
static_cast<int32_t>(kMinT8));
output_data[outer_index * depth + inner_index] =
static_cast<T>(output_in_q27);
} else {
output_data[outer_index * depth + inner_index] = kMinT8;
}
}
}
}
inline void LogSoftmax(const SoftmaxParams& params, const size_t outer_size,
const size_t depth, const RuntimeShape& input_shape,
const int8_t* input_data,
const RuntimeShape& output_shape, int8_t* output_data) {
LogSoftmaxQuantized(params, outer_size, depth, input_shape, input_data,
output_shape, output_data);
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOG_SOFTMAX_H_

View File

@@ -51,7 +51,7 @@ inline void Mul(const ArithmeticParams& params,
GetActivationParams(params, &output_activation_min, &output_activation_max);
const int flat_size =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
output_data[i] = ActivationFunctionWithMinMax(
input1_data[i] * input2_data[i], output_activation_min,
@@ -66,7 +66,7 @@ inline void Mul(const ArithmeticParams& params,
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingFlatSize(input1_shape, input2_shape, output_shape);
MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape);
MulElementwise(flat_size, params, input1_data, input2_data, output_data);
}

View File

@@ -24,8 +24,8 @@ namespace tflite {
namespace reference_ops {
// TFLite Pad supports activation tensors with up to 4 dimensions.
constexpr int PadKernelMaxDimensionCount() { return 4; }
// TFLite Pad supports activation tensors with up to 5 dimensions.
constexpr int PadKernelMaxDimensionCount() { return 5; }
// There are two versions of pad: Pad and PadV2. In PadV2 there is a second
// scalar input that provides the padding value. Therefore pad_value_ptr can be
@@ -46,8 +46,8 @@ inline void PadImpl(const tflite::PadParams& op_params,
TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
// Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
// pad them to 4 dims (yes, we are "padding the padding").
// Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
// pad them to 5 dims (yes, we are "padding the padding").
int left_padding_copy[PadKernelMaxDimensionCount()];
for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
left_padding_copy[i] = 0;
@@ -67,39 +67,46 @@ inline void PadImpl(const tflite::PadParams& op_params,
}
const int output_batch = ext_output_shape.Dims(0);
const int output_height = ext_output_shape.Dims(1);
const int output_width = ext_output_shape.Dims(2);
const int output_depth = ext_output_shape.Dims(3);
const int output_plane = ext_output_shape.Dims(1);
const int output_height = ext_output_shape.Dims(2);
const int output_width = ext_output_shape.Dims(3);
const int output_depth = ext_output_shape.Dims(4);
const int left_b_padding = left_padding_copy[0];
const int left_h_padding = left_padding_copy[1];
const int left_w_padding = left_padding_copy[2];
const int left_d_padding = left_padding_copy[3];
const int left_p_padding = left_padding_copy[1];
const int left_h_padding = left_padding_copy[2];
const int left_w_padding = left_padding_copy[3];
const int left_d_padding = left_padding_copy[4];
const int right_b_padding = right_padding_copy[0];
const int right_h_padding = right_padding_copy[1];
const int right_w_padding = right_padding_copy[2];
const int right_d_padding = right_padding_copy[3];
const int right_p_padding = right_padding_copy[1];
const int right_h_padding = right_padding_copy[2];
const int right_w_padding = right_padding_copy[3];
const int right_d_padding = right_padding_copy[4];
const T pad_value = *pad_value_ptr;
const T* in_ptr = input_data;
T* out_ptr = output_data;
for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_h = 0; out_h < output_height; ++out_h) {
for (int out_w = 0; out_w < output_width; ++out_w) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
if (out_b < left_b_padding ||
out_b >= output_batch - right_b_padding ||
out_h < left_h_padding ||
out_h >= output_height - right_h_padding ||
out_w < left_w_padding ||
out_w >= output_width - right_w_padding ||
out_d < left_d_padding ||
out_d >= output_depth - right_d_padding) {
*out_ptr++ = pad_value;
} else {
*out_ptr++ = *in_ptr++;
for (int out_p = 0; out_p < output_plane; ++out_p) {
for (int out_h = 0; out_h < output_height; ++out_h) {
for (int out_w = 0; out_w < output_width; ++out_w) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
if (out_b < left_b_padding ||
out_b >= output_batch - right_b_padding ||
out_p < left_p_padding ||
out_p >= output_plane - right_p_padding ||
out_h < left_h_padding ||
out_h >= output_height - right_h_padding ||
out_w < left_w_padding ||
out_w >= output_width - right_w_padding ||
out_d < left_d_padding ||
out_d >= output_depth - right_d_padding) {
*out_ptr++ = pad_value;
} else {
*out_ptr++ = *in_ptr++;
}
}
}
}

View File

@@ -23,7 +23,7 @@ limitations under the License.
namespace tflite {
namespace reference_ops {
inline void AveragePool(const PoolParams& params,
inline bool AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
@@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params,
filter_count++;
}
}
if (filter_count == 0) return false;
const float average = total / filter_count;
output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
ActivationFunctionWithMinMax(average, params.float_activation_min,
@@ -74,9 +75,10 @@ inline void AveragePool(const PoolParams& params,
}
}
}
return true;
}
inline void AveragePool(const PoolParams& params,
inline bool AveragePool(const PoolParams& params,
const RuntimeShape& input_shape,
const uint8_t* input_data,
const RuntimeShape& output_shape,
@@ -122,6 +124,7 @@ inline void AveragePool(const PoolParams& params,
filter_count++;
}
}
if (filter_count == 0) return false;
acc = (acc + filter_count / 2) / filter_count;
acc = std::max(acc, params.quantized_activation_min);
acc = std::min(acc, params.quantized_activation_max);
@@ -131,6 +134,7 @@ inline void AveragePool(const PoolParams& params,
}
}
}
return true;
}
inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,

View File

@@ -0,0 +1,774 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <limits>
#include <utility>
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
#if defined(_MSC_VER)
#define __restrict__ __restrict
#endif
namespace tflite {
namespace tensor_utils {
namespace {
const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
} // namespace
void PortableSymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* min_value,
float* max_value, float* scaling_factor) {
auto minmax = std::minmax_element(values, values + size);
*min_value = *minmax.first;
*max_value = *minmax.second;
PortableSymmetricQuantizeFloats(values, size, quantized_values, *min_value,
*max_value, scaling_factor);
}
void PortableSymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float min_value,
float max_value, float* scaling_factor) {
const int32_t kScale = 127;
const float range = std::max(std::abs(min_value), std::abs(max_value));
if (range == 0) {
memset(quantized_values, 0, size * sizeof(int8_t));
*scaling_factor = 1;
return;
}
*scaling_factor = range / kScale;
const float scaling_factor_inv = kScale / range;
for (int i = 0; i < size; ++i) {
const int32_t quantized_value =
static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
// Clamp: just in case some odd numeric offset.
quantized_values[i] = static_cast<int8_t>(
std::min(kScale, std::max(-kScale, quantized_value)));
}
}
void PortableAsymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values,
float* scaling_factor, int32_t* offset) {
const int32_t kMinScale = -128;
const int32_t kMaxScale = 127;
const double qmin_double = kMinScale;
const double qmax_double = kMaxScale;
const auto minmax = std::minmax_element(values, values + size);
const double rmin = std::fmin(0, *minmax.first);
const double rmax = std::fmax(0, *minmax.second);
if (rmin == rmax) {
memset(quantized_values, 0, size * sizeof(int8_t));
*scaling_factor = 1;
*offset = 0;
return;
} else {
double scale = (rmax - rmin) / (qmax_double - qmin_double);
const double zero_point_from_min = qmin_double - rmin / scale;
const double zero_point_from_max = qmax_double - rmax / scale;
const double zero_point_from_min_error =
std::abs(qmin_double) + std::abs(rmin / scale);
const double zero_point_from_max_error =
std::abs(qmax_double) + std::abs(rmax / scale);
const double zero_point_double =
zero_point_from_min_error < zero_point_from_max_error
? zero_point_from_min
: zero_point_from_max;
int8_t nudged_zero_point = 0;
if (zero_point_double <= qmin_double) {
nudged_zero_point = kMinScale;
} else if (zero_point_double >= qmax_double) {
nudged_zero_point = kMaxScale;
} else {
nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
}
*scaling_factor = scale;
*offset = nudged_zero_point;
}
const float scaling_factor_inv = 1.0f / *scaling_factor;
for (int i = 0; i < size; ++i) {
const int32_t quantized_value = static_cast<int32_t>(
TfLiteRound(*offset + values[i] * scaling_factor_inv));
quantized_values[i] =
std::min(kMaxScale, std::max(kMinScale, quantized_value));
}
}
void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
int m_rows, int m_cols,
const float* vector,
int n_batch, float* result) {
float* result_in_batch = result;
for (int b = 0; b < n_batch; b++) {
const float* matrix_ptr = matrix;
for (int r = 0; r < m_rows; r++) {
float dot_prod = 0.0f;
const float* vector_in_batch = vector + b * m_cols;
for (int c = 0; c < m_cols; c++) {
dot_prod += *matrix_ptr++ * *vector_in_batch++;
}
*result_in_batch += dot_prod;
++result_in_batch;
}
}
}
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result) {
for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
const float batch_scaling_factor = scaling_factors[batch];
// Get the address of the first row.
const int8_t* row_ptr = matrix;
for (int row = 0; row < m_rows; ++row) {
// Initialize the dot product sum for the row to 0.
int32_t dotprod = 0;
#if defined(__GNUC__)
// Prefetch the row to cache.
__builtin_prefetch(row_ptr, 0 /* prefetch for read */,
3 /* temporal locality */);
#endif
for (int col = 0; col < m_cols; ++col, ++row_ptr) {
dotprod += (*row_ptr) * (vectors[col]);
} // for col
*result += dotprod * batch_scaling_factor;
++result;
} // for row
} // for batch
}
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result, const float* per_channel_scale,
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
bool* compute_row_sums, CpuBackendContext* context) {
if (input_offset == nullptr) {
PortableMatrixBatchVectorMultiplyAccumulate(
matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result);
return;
}
if (!compute_row_sums || *compute_row_sums) {
PortableReductionSumVector(matrix, row_sums, m_rows, m_cols);
if (compute_row_sums) {
*compute_row_sums = false;
}
}
for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
const float batch_scaling_factor = scaling_factors[batch];
const int32_t batch_offset = input_offset[batch];
const int8_t* row_ptr = matrix;
for (int row = 0; row < m_rows; ++row) {
int32_t dotprod = 0;
float scale = batch_scaling_factor;
if (per_channel_scale) {
scale *= per_channel_scale[row];
}
#if defined(__GNUC__)
// Prefetch the row to cache.
__builtin_prefetch(row_ptr, 0 /* prefetch for read */,
3 /* temporal locality */);
#endif
for (int col = 0; col < m_cols; ++col, ++row_ptr) {
dotprod += (*row_ptr) * vectors[col];
} // for col
dotprod -= row_sums[row] * batch_offset;
*result += dotprod * scale;
++result;
} // for row
} // for batch
}
void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
const int kBlockSize = 4;
TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
for (int batch = 0; batch < n_batch; batch++) {
const float* matrix_ptr = matrix;
for (int row = 0; row < m_rows; row++) {
float dot_prod = 0.0f;
const float* vector_in_batch = vector + batch * m_cols;
for (int i = segments[row]; i < segments[row + 1]; i++) {
const int block_start_index = indices[i] * kBlockSize;
const float* vector_block_in_batch_ptr =
vector_in_batch + block_start_index;
for (int c = 0; c < kBlockSize; c++) {
dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
}
}
result[batch * m_rows + row] += dot_prod;
}
}
}
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
float* __restrict__ result) {
const int kBlockSize = 16;
TFLITE_DCHECK_EQ( // NOLINT
m_cols % kBlockSize, 0);
for (int batch = 0; batch < n_batch; batch++) {
const float* matrix_ptr = matrix;
const uint8_t* ledger_ptr = ledger;
for (int row = 0; row < m_rows; row++) {
float dot_prod = 0.0f;
int num_nonzero_blocks = *ledger_ptr++;
if (num_nonzero_blocks > 0) {
const float* vector_in_batch = vector + batch * m_cols;
for (int i = 0; i < num_nonzero_blocks; i++) {
const int block_start_index = *ledger_ptr++ * kBlockSize;
const float* vector_block_in_batch_ptr =
vector_in_batch + block_start_index;
for (int c = 0; c < kBlockSize; c++) {
dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
}
}
}
result[batch * m_rows + row] += dot_prod;
}
}
}
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
const int m_cols, const int8_t* __restrict__ vectors,
const float* scaling_factors, int n_batch, float* __restrict__ result) {
static const int kBlockSize = 16;
TFLITE_DCHECK_EQ( // NOLINT
m_cols % kBlockSize, 0);
for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
const float batch_scaling_factor = scaling_factors[batch];
const uint8_t* ledger_ptr = ledger;
// Get the address of the first row.
const int8_t* row_ptr = matrix;
for (int row = 0; row < m_rows; ++row) {
// Initialize the dot product sum for the row to 0.
int32_t dotprod = 0;
#if defined(__GNUC__)
// Prefetch the row to cache.
__builtin_prefetch(row_ptr, 0 /* prefetch for read */,
3 /* temporal locality */);
#endif
int num_nonzero_blocks = *ledger_ptr++;
for (int i = 0; i < num_nonzero_blocks; i++) {
const int block_start_index = *ledger_ptr++ * kBlockSize;
const int8_t* vector_block_ptr = vectors + block_start_index;
for (int c = 0; c < kBlockSize; c++) {
dotprod += (*row_ptr++) * (*vector_block_ptr++);
} // for block
} // for num_nonzero_blocks
result[batch * m_rows + row] += dotprod * batch_scaling_factor;
} // for row
} // for batch
}
template <typename T>
void PortableMatrixBatchVectorMultiplyAccumulateImpl(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
T* output) {
const int16_t output_max = std::numeric_limits<T>::max();
const int16_t output_min = std::numeric_limits<T>::min();
for (int batch = 0; batch < n_batch; ++batch) {
for (int row = 0; row < n_output; ++row) {
int32_t acc = bias[row];
for (int col = 0; col < n_input; ++col) {
int8_t input_val = input[batch * n_input + col];
int8_t weights_val = input_to_gate_weights[row * n_input + col];
acc += input_val * weights_val;
}
acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
acc += output_zp;
acc += output[batch * n_output + row];
if (acc > output_max) {
acc = output_max;
}
if (acc < output_min) {
acc = output_min;
}
output[batch * n_output + row] = static_cast<T>(acc);
}
}
}
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int16_t* output, CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulateImpl(
input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
n_output, output_zp, output);
}
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int8_t* output, CpuBackendContext* context) {
PortableMatrixBatchVectorMultiplyAccumulateImpl(
input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
n_output, output_zp, output);
}
void PortableMatrixBatchVectorMultiply(const int8_t* input,
int32_t input_zeropoint,
const int8_t* input_to_gate_weights,
int32_t input_to_gate_effective_scale_a,
int32_t input_to_gate_effective_scale_b,
int32_t n_batch, int32_t n_input,
int32_t n_cell, int8_t* gate_output,
int8_t gate_output_zp) {
const int32_t int8_max = std::numeric_limits<int8_t>::max();
const int32_t int8_min = std::numeric_limits<int8_t>::min();
for (int batch = 0; batch < n_batch; ++batch) {
for (int row = 0; row < n_cell; ++row) {
int32_t acc = 0;
for (int col = 0; col < n_input; ++col) {
int32_t input_val = input[batch * n_input + col];
int8_t weights_val = input_to_gate_weights[row * n_input + col];
acc += (input_val - input_zeropoint) * weights_val;
}
acc = MultiplyByQuantizedMultiplier(acc, input_to_gate_effective_scale_a,
input_to_gate_effective_scale_b);
acc += gate_output_zp;
if (acc > int8_max) {
acc = int8_max;
}
if (acc < int8_min) {
acc = int8_min;
}
gate_output[batch * n_cell + row] = static_cast<int8_t>(acc);
}
}
}
void PortableMatrixBatchVectorMultiply(
const int16_t* hidden, const int8_t* hidden_to_output_weights,
int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
int32_t n_output, int32_t output_zp, int8_t* proj_output) {
const int16_t int8_max = std::numeric_limits<int8_t>::max();
const int16_t int8_min = std::numeric_limits<int8_t>::min();
for (int batch = 0; batch < n_batch; ++batch) {
for (int row = 0; row < n_output; ++row) {
int64_t acc = gate_bias[row];
for (int col = 0; col < n_hidden; ++col) {
int16_t input_val = hidden[batch * n_hidden + col];
int8_t weights_val = hidden_to_output_weights[row * n_hidden + col];
int64_t curr = acc;
acc += input_val * weights_val;
if (input_val * weights_val > 0 && acc < curr) {
acc = std::numeric_limits<int32_t>::max();
}
if (input_val * weights_val < 0 && acc > curr) {
acc = std::numeric_limits<int32_t>::min();
}
}
acc = MultiplyByQuantizedMultiplier(acc, proj_effective_scale_a,
proj_effective_scale_b);
acc += output_zp;
if (acc > int8_max) {
acc = int8_max;
}
if (acc < int8_min) {
acc = int8_min;
}
proj_output[batch * n_output + row] = acc;
}
}
}
void PortableApplyLayerNorm(const int16_t* input,
const int16_t* layer_norm_weights,
const int32_t* bias, int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b, int32_t variance_limit,
int n_batch, int n_input, int16_t* output) {
// The square of std::pow(2, 10), which is the extra factor that makes sure
// normalized values has enough resolution.
static const int kTwoToPower20 = 1 << 20;
for (int i = 0; i < n_batch; ++i) {
int64_t sum = 0;
int64_t sum_sq = 0;
for (int j = 0; j < n_input; ++j) {
const int32_t index = i * n_input + j;
int32_t val = static_cast<int32_t>(input[index]);
sum += val;
sum_sq += val * val;
}
int32_t mean =
static_cast<int32_t>(static_cast<int64_t>(sum) * 1024 / n_input);
// TODO(b/173994730): Avoids overflow but only works for POT n_input.
int32_t temp = kTwoToPower20 / n_input;
int64_t variance =
sum_sq * temp - static_cast<int64_t>(mean) * static_cast<int64_t>(mean);
int32_t variance2 = static_cast<int32_t>(variance / kTwoToPower20);
if (variance2 < 1) {
variance2 = variance_limit;
}
int32_t stddev_inverse_a;
int stddev_inverse_b;
GetInvSqrtQuantizedMultiplierExp(variance2, /*reverse_shift*/ -1,
&stddev_inverse_a, &stddev_inverse_b);
for (int j = 0; j < n_input; ++j) {
const int32_t index = i * n_input + j;
int32_t val = static_cast<int32_t>(input[index]);
int32_t shifted = 1024 * val - mean;
int32_t rescaled = MultiplyByQuantizedMultiplier(
shifted, stddev_inverse_a, stddev_inverse_b);
// TODO(jianlijianli): Saturate this.
int64_t val3 = rescaled * layer_norm_weights[j] + bias[j];
int32_t val4 =
static_cast<int32_t>((val3 > 0 ? val3 + 512 : val3 - 512) / 1024);
int32_t val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a,
layer_norm_scale_b + 12);
val5 = std::min(std::max(kInt16Min, val5), kInt16Max);
output[index] = static_cast<int16_t>(val5);
}
}
}
void PortableApplyLayerNormFloat(const int16_t* input,
const int16_t* layer_norm_weights,
int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b,
const int32_t* bias, int n_batch, int n_input,
int16_t* output) {
const int32_t int16_max = std::numeric_limits<int16_t>::max();
const int32_t int16_min = std::numeric_limits<int16_t>::min();
const float layer_norm_scale =
layer_norm_scale_a *
std::pow(2.0, static_cast<double>(layer_norm_scale_b - 31));
const float bias_scale =
static_cast<float>(std::pow(2.0, -10)) * layer_norm_scale;
for (int batch = 0; batch < n_batch; ++batch) {
float sum = 0.0f;
float sum_sq = 0.0f;
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const float value = static_cast<float>(input[index]);
sum += value;
sum_sq += value * value;
}
const float mean = sum / n_input;
float stddev_inv = 0.0f;
const float variance = sum_sq / n_input - mean * mean;
if (variance == 0) {
stddev_inv = 1.0f / std::sqrt(1e-8f);
} else {
stddev_inv = 1.0f / std::sqrt(variance);
}
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const float normalized_value =
(static_cast<float>(input[index]) - mean) * stddev_inv;
const float weighted_normalized_value =
normalized_value * layer_norm_weights[i] * layer_norm_scale +
bias[i] * bias_scale;
const int32_t quant_output = static_cast<int32_t>(std::round(
weighted_normalized_value * static_cast<float>(std::pow(2, 12))));
output[index] = std::min(int16_max, std::max(int16_min, quant_output));
}
}
}
void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
int32_t scalar, int32_t n_row,
int32_t n_col, int32_t* output) {
for (int i = 0; i < n_row; ++i) {
int32_t row_sum = 0;
for (int j = 0; j < n_col; ++j) {
row_sum += *matrix++;
}
output[i] += row_sum * scalar;
}
}
void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output) {
for (int batch = 0; batch < n_batch; ++batch) {
for (int c = 0; c < n_input; c++) {
using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
const int index = batch * n_input + c;
F3 sigmoid_input = F3::FromRaw(input[index]);
F0 sigmoid_output = gemmlowp::logistic(sigmoid_input);
output[index] = sigmoid_output.raw();
}
}
}
void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output) {
const int32_t int16_max = std::numeric_limits<int16_t>::max();
const int32_t int16_min = std::numeric_limits<int16_t>::min();
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const float float_input =
input[index] * static_cast<float>(std::pow(2, -12));
const float float_output = 1.0f / (1.0f + std::exp(-float_input));
const int32_t quant_output = static_cast<int32_t>(
float_output * static_cast<float>(std::pow(2, 15)));
const int32_t quant_output_clamped =
std::min(int16_max, std::max(int16_min, quant_output));
output[index] = static_cast<int16_t>(quant_output_clamped);
}
}
}
template <int IntegerBits>
void PortableApplyTanhImpl(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output) {
using FX = gemmlowp::FixedPoint<std::int16_t, IntegerBits>;
using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
FX tanh_input = FX::FromRaw(input[index]);
F0 tanh_output = gemmlowp::tanh(tanh_input);
output[index] = tanh_output.raw();
}
}
}
void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
int32_t n_batch, int32_t n_input, int16_t* output) {
assert(integer_bits <= 6);
#define DISPATCH_TANH(i) \
case i: \
PortableApplyTanhImpl<i>(input, n_batch, n_input, output); \
break;
switch (integer_bits) {
DISPATCH_TANH(0);
DISPATCH_TANH(1);
DISPATCH_TANH(2);
DISPATCH_TANH(3);
DISPATCH_TANH(4);
DISPATCH_TANH(5);
DISPATCH_TANH(6);
default:
return;
}
#undef DISPATCH_TANH
}
void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int32_t integer_bits,
int16_t* output) {
const int32_t int16_max = std::numeric_limits<int16_t>::max();
const int32_t int16_min = std::numeric_limits<int16_t>::min();
const double two = 2.0;
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const float float_input =
input[index] * std::pow(two, static_cast<double>(integer_bits));
const float float_output = std::tanh(float_input);
const int32_t quant_output = static_cast<int32_t>(
float_output * static_cast<float>(std::pow(2, 15)));
const int32_t quant_output_clamped =
std::min(int16_max, std::max(int16_min, quant_output));
output[index] = static_cast<int16_t>(quant_output_clamped);
}
}
}
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int shift, int16_t* output) {
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const int16_t a = input_1[index];
const int16_t b = input_2[index];
const int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
output[index] =
static_cast<int16_t>(gemmlowp::RoundingDivideByPOT(value, shift));
}
}
}
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
int32_t multiplier, int32_t shift, int32_t n_batch,
int32_t n_input, int32_t output_zp, int8_t* output) {
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
const int16_t a = input_1[index];
const int16_t b = input_2[index];
int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
value = MultiplyByQuantizedMultiplier(value, multiplier, shift);
value -= output_zp;
value = std::min(std::max(static_cast<int32_t>(-128), value),
static_cast<int32_t>(127));
output[index] = static_cast<int8_t>(value);
}
}
}
void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int16_t* output) {
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
int32_t sum = input_1[index] + input_2[index];
const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum));
output[index] = static_cast<int16_t>(sum_clamped);
}
}
}
float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
int v_size) {
float result = 0.0;
for (int v = 0; v < v_size; v++) {
result += *vector1++ * *vector2++;
}
return result;
}
namespace {
inline int32_t VectorVectorDotProduct(const int16_t* vector1,
const int16_t* vector2, int v_size) {
int32_t result = 0;
for (int v = 0; v < v_size; v++) {
result += *vector1++ * *vector2++;
}
return result;
}
} // namespace
void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
const int16_t* vector2,
int v_size, int n_batch,
int32_t* result) {
for (int b = 0; b < n_batch; b++) {
result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
vector1 += v_size;
vector2 += v_size;
}
}
void PortableVectorBatchVectorCwiseProductAccumulate(
const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
int32_t multiplier, int shift, int16_t* result) {
for (int b = 0; b < n_batch; b++) {
for (int v = 0; v < v_size; v++) {
int32_t prod = vector[v] * *batch_vector++;
prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift);
int32_t output = prod + *result;
output = std::max(std::min(static_cast<int32_t>(32767), output),
static_cast<int32_t>(-32768));
*result++ = output;
}
}
}
void PortableSub1Vector(const float* vector, int v_size, float* result) {
for (int v = 0; v < v_size; v++) {
*result++ = 1.0f - *vector++;
}
}
void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result) {
static const int16_t kOne = 32767;
for (int v = 0; v < v_size; v++) {
*result++ = kOne - *vector++;
}
}
void PortableVectorScalarMultiply(const int8_t* vector, const int v_size,
const float scale, float* result) {
for (int v = 0; v < v_size; ++v) {
*result++ = scale * *vector++;
}
}
void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
float* __restrict__ output_vector,
int v_size, int n_batch) {
for (int batch = 0; batch < n_batch; ++batch) {
float sum = 0.0f;
for (int i = 0; i < v_size; ++i) {
sum += input_vector[i];
}
const float mean = sum / v_size;
float sum_diff_sq = 0.0f;
for (int i = 0; i < v_size; ++i) {
const float diff = input_vector[i] - mean;
sum_diff_sq += diff * diff;
}
const float variance = sum_diff_sq / v_size;
constexpr float kNormalizationConstant = 1e-8f;
const float stddev_inv =
1.0f / std::sqrt(variance + kNormalizationConstant);
for (int i = 0; i < v_size; ++i) {
output_vector[i] = (input_vector[i] - mean) * stddev_inv;
}
input_vector += v_size;
output_vector += v_size;
}
}
void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
const int8_t* recurrent, int8_t recurrent_zp,
int32_t input_effective_scale_a,
int32_t input_effective_scale_b,
int32_t recurrent_effective_scale_a,
int32_t recurrent_effective_scale_b,
int32_t n_batch, int32_t n_cell,
int16_t* output) {
const int32_t int16_max = std::numeric_limits<int16_t>::max();
const int32_t int16_min = std::numeric_limits<int16_t>::min();
for (int i = 0; i < n_batch * n_cell; ++i) {
int32_t x = static_cast<int32_t>(input[i]) - static_cast<int32_t>(input_zp);
int32_t h =
static_cast<int32_t>(recurrent[i]) - static_cast<int32_t>(recurrent_zp);
int32_t x_scaled = MultiplyByQuantizedMultiplier(x, input_effective_scale_a,
input_effective_scale_b);
int32_t h_scaled = MultiplyByQuantizedMultiplier(
h, recurrent_effective_scale_a, recurrent_effective_scale_b);
int32_t y = h_scaled + x_scaled;
if (y > int16_max) {
y = int16_max;
}
if (y < int16_min) {
y = int16_min;
}
output[i] = static_cast<int16_t>(y);
}
}
} // namespace tensor_utils
} // namespace tflite

View File

@@ -0,0 +1,235 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
#include <algorithm>
#include <cstdint>
#if defined(_MSC_VER)
#define __restrict__ __restrict
#endif
namespace tflite {
// Not all backends support CpuBackendContext usage, so forward declare to avoid
// pulling in its implementation.
class CpuBackendContext;
namespace tensor_utils {
template <typename T>
bool PortableIsZeroVector(const T* vector, int v_size) {
for (int i = 0; i < v_size; ++i) {
if (vector[i] != 0) {
return false;
}
}
return true;
}
void PortableSymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float* min_value,
float* max_value, float* scaling_factor);
void PortableSymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values, float min_value,
float max_value, float* scaling_factor);
void PortableAsymmetricQuantizeFloats(const float* values, const int size,
int8_t* quantized_values,
float* scaling_factor, int32_t* offset);
// Multiply a matrix by a batch vector, and store results in a batch-size
// vector.
void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
int m_rows, int m_cols,
const float* vector,
int n_batch, float* result);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vectors, const float* scaling_factors,
int n_batch, float* __restrict__ result, const float* per_channel_scale,
const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
bool* compute_row_sums, CpuBackendContext* context);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
const int8_t* __restrict__ vector, const float* scaling_factors,
int n_batch, int32_t* scratch, float* __restrict__ result,
CpuBackendContext* context);
void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
const float* __restrict__ matrix, const int32_t* __restrict__ segments,
const int32_t* __restrict__ indices, int m_rows, int m_cols,
const float* __restrict__ vector, int n_batch, float* __restrict__ result);
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
float* __restrict__ result);
void PortableSparseMatrixBatchVectorMultiplyAccumulate(
const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
const int m_cols, const int8_t* __restrict__ vectors,
const float* scaling_factors, int n_batch, float* __restrict__ result);
// Dot product of two vectors.
float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
int v_size);
void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
const int16_t* vector2,
int v_size, int n_batch,
int32_t* result);
void PortableVectorBatchVectorCwiseProductAccumulate(
const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
int32_t multiplier, int shift, int16_t* result);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int16_t* output, CpuBackendContext* context);
void PortableMatrixBatchVectorMultiplyAccumulate(
const int8_t* input, const int32_t* bias,
const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
int32_t* scratch, int8_t* output, CpuBackendContext* context);
void PortableMatrixBatchVectorMultiply(const int8_t* input,
int32_t input_zeropoint,
const int8_t* input_to_gate_weights,
int32_t input_to_gate_effective_scale_a,
int32_t input_to_gate_effective_scale_b,
int32_t n_batch, int32_t n_input,
int32_t n_cell, int8_t* gate_output,
int8_t gate_output_zp);
void PortableMatrixBatchVectorMultiply(
const int16_t* hidden, const int8_t* hidden_to_output_weights,
int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
int32_t n_output, int32_t output_zp, int8_t* proj_output);
void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
int32_t scalar, int32_t n_row,
int32_t n_col, int32_t* output);
void PortableApplyLayerNorm(const int16_t* input,
const int16_t* layer_norm_weights,
const int32_t* bias, int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b, int32_t variance_limit,
int n_batch, int n_input, int16_t* output);
void PortableApplyLayerNormFloat(const int16_t* input,
const int16_t* layer_norm_weights,
int32_t layer_norm_scale_a,
int32_t layer_norm_scale_b,
const int32_t* bias, int n_batch, int n_input,
int16_t* output);
void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output);
void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int16_t* output);
void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
int32_t n_batch, int32_t n_input, int16_t* output);
void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
int32_t n_input, int32_t integer_bits,
int16_t* output);
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int shift, int16_t* output);
void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
int32_t multiplier, int32_t shift, int32_t n_batch,
int32_t n_input, int32_t output_zp, int8_t* output);
void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
int n_batch, int n_input, int16_t* output);
template <typename T>
void PortableCwiseClipping(T* vector, const int v_size,
const T& clipping_value) {
for (int i = 0; i < v_size; i++) {
vector[i] = std::max(std::min(clipping_value, vector[i]),
static_cast<T>(-clipping_value));
}
}
// Batch vector initialization with another vector.
void PortableVectorBatchVectorAssign(const float* vector, int v_size,
int n_batch, float* batch_vector);
// Compute "1.0f - elements of vector" (used in CIFG).
void PortableSub1Vector(const float* vector, int v_size, float* result);
void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result);
// Multiply all elements of vector with a scalar.
void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
float* result);
// Reduce-sum on a vector:
// input_vector: pointer to input vector.
// output_vector: pointer to vector.
// output_size: output vector size.
// reduction_size: number of consecutive elements from input vector which are
// added to get one element of output.
template <typename INPUT, typename OUTPUT>
void PortableReductionSumVector(const INPUT* input_vector,
OUTPUT* output_vector, int output_size,
int reduction_size) {
for (int o = 0; o < output_size; o++) {
OUTPUT result = 0;
for (int r = 0; r < reduction_size; r++) {
result += input_vector[r];
}
output_vector[o] = result;
input_vector += reduction_size;
}
}
// Layer norm for each batch.
void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
float* __restrict__ output_vector,
int v_size, int n_batch);
// Saturate Add.
void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
const int8_t* recurrent, int8_t recurrent_zp,
int32_t input_effective_scale_a,
int32_t input_effective_scale_b,
int32_t recurrent_effective_scale_a,
int32_t recurrent_effective_scale_b,
int32_t n_batch, int32_t n_cell,
int16_t* output);
} // namespace tensor_utils
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_

View File

@@ -23,6 +23,25 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
// Check if the reduction at index is the first one along the dimensions given
// in axis.
inline bool IsFirstReduction(const int* index, const int num_axis,
const int* axis) {
if (num_axis == 0) {
return true;
}
TFLITE_DCHECK(index != nullptr);
TFLITE_DCHECK(axis != nullptr);
for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
if (index[axis[axis_idx]] != 0) {
return false;
}
}
return true;
}
namespace tflite {
namespace reference_ops {
@@ -35,8 +54,7 @@ inline bool Reduce(const In* input_data, const int* input_dims,
const int* output_dims, const int input_num_dims,
const int output_num_dims, const int* axis,
const int num_axis, int* input_iter,
Out reducer(const Out current, const In in),
Out* output_data) {
Out reducer(Out current, const In in), Out* output_data) {
// Reset input iterator.
for (int idx = 0; idx < input_num_dims; ++idx) {
input_iter[idx] = 0;
@@ -53,6 +71,37 @@ inline bool Reduce(const In* input_data, const int* input_dims,
return true;
}
// Similar to above Reduce function but takes two reducer functions.
// The 'reducer_first' is called with the first value of the reduction,
// 'reducer_next' is then called for all the others.
template <typename In, typename Out>
inline bool Reduce(const In* input_data, const int* input_dims,
const int* output_dims, const int input_num_dims,
const int output_num_dims, const int* axis,
const int num_axis, int* input_iter,
const std::function<Out(In in)>& reducer_first,
const std::function<Out(Out current, In in)>& reducer_next,
Out* output_data) {
// Reset input iterator.
for (int idx = 0; idx < input_num_dims; ++idx) {
input_iter[idx] = 0;
}
// Iterate through input_data.
do {
size_t input_offset =
ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
input_iter, num_axis, axis);
if (IsFirstReduction(input_iter, num_axis, axis)) {
output_data[output_offset] = reducer_first(input_data[input_offset]);
} else {
output_data[output_offset] =
reducer_next(output_data[output_offset], input_data[input_offset]);
}
} while (NextIndex(input_num_dims, input_dims, input_iter));
return true;
}
// This method parses the input 'axis' to remove duplicates and handle negative
// values, and returns a valid 'out_axis'
inline bool ResolveAxis(const int num_dims, const int* axis,
@@ -111,7 +160,8 @@ inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
for (int idx = 0; idx < num_dims; ++idx) {
size_t current = static_cast<size_t>(dims[idx]);
// Overflow prevention.
if (num_elements > std::numeric_limits<size_t>::max() / current) {
if (current > 0 &&
num_elements > std::numeric_limits<size_t>::max() / current) {
return false;
}
num_elements *= current;
@@ -132,17 +182,20 @@ inline bool ReduceGeneric(const T* input_data, const int* input_dims,
bool keep_dims, int* temp_index, int* resolved_axis,
T init_value,
T reducer(const T current, const T in)) {
// Return early when input shape has zero dim.
for (int i = 0; i < input_num_dims; ++i) {
if (input_dims[i] == 0) return true;
}
// Reset output data.
if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
output_data)) {
return false;
}
// Return early when input shape has zero dim. This is done after initializing
// data for output tensor because there are cases that the input tensor is
// empty but output tensor is not. In that case, output tensor should be
// filled with init_value.
for (int i = 0; i < input_num_dims; ++i) {
if (input_dims[i] == 0) return true;
}
// Resolve axis.
int num_resolved_axis = 0;
if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
@@ -290,9 +343,9 @@ inline void Mean(const tflite::MeanParams& op_params,
constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
int32_t bias =
output_zero_point -
static_cast<int32_t>(input_zero_point * input_scale / output_scale);
float temp = input_zero_point * input_scale / output_scale;
temp = temp > 0 ? temp + 0.5f : temp - 0.5f;
int32_t bias = output_zero_point - static_cast<int32_t>(temp);
double real_scale =
static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
@@ -353,6 +406,14 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
temp_sum[idx] = U();
}
// Return early when input shape has zero dim. This is done after initializing
// data for output tensor because there are cases that the input tensor is
// empty but output tensor is not. In that case, output tensor should be
// filled with init_value.
for (int i = 0; i < input_num_dims; ++i) {
if (input_dims[i] == 0) return true;
}
// Resolve axis.
int num_resolved_axis = 0;
if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
@@ -405,6 +466,57 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
return true;
}
template <typename T>
inline bool QuantizedReduceProd(const T* input_data, int32_t input_zero_point,
const RuntimeShape& input_shape, T* output_data,
int32_t output_zero_point,
const RuntimeShape& output_shape,
const int* axis,
const int64_t num_axis_dimensions,
bool keep_dims, int* temp_index,
int* resolved_axis, int32_t* temp_prod,
int32_t scaling_multiplier, int scaling_shift) {
const int32_t kMinValue = std::numeric_limits<T>::min();
const int32_t kMaxValue = std::numeric_limits<T>::max();
// Resolve axis.
int num_resolved_axis = 0;
if (!ResolveAxis(input_shape.DimensionsCount(), axis, num_axis_dimensions,
resolved_axis, &num_resolved_axis)) {
return false;
}
// Calculate the reduced product by rescaling each multiplication step to
// avoid an overflow.
auto reducer_first = [&](T in) -> int32_t { return in - input_zero_point; };
auto reducer_next = [&](int32_t current, T in) -> int32_t {
const int64_t result =
static_cast<int64_t>(current) * (in - input_zero_point);
return MultiplyByQuantizedMultiplier(result, scaling_multiplier,
scaling_shift);
};
if (!Reduce<T, int32_t>(
input_data, input_shape.DimsData(), output_shape.DimsData(),
input_shape.DimensionsCount(), output_shape.DimensionsCount(),
resolved_axis, num_resolved_axis, temp_index, reducer_first,
reducer_next, temp_prod)) {
return false;
}
for (int i = 0; i < output_shape.FlatSize(); i++) {
int32_t result =
MultiplyByQuantizedMultiplier(static_cast<int64_t>(temp_prod[i]),
scaling_multiplier, scaling_shift) +
output_zero_point;
result = std::min(std::max(result, kMinValue), kMaxValue);
output_data[i] = static_cast<T>(result);
}
return true;
}
} // namespace reference_ops
} // namespace tflite

View File

@@ -0,0 +1,228 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <limits>
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
inline void ComputeInterpolationValues(const float value, const float scale,
const bool half_pixel_centers,
int32_t input_size, float* scaled_value,
int32_t* lower_bound,
int32_t* upper_bound) {
if (half_pixel_centers) {
*scaled_value = (value + 0.5f) * scale - 0.5f;
} else {
*scaled_value = value * scale;
}
float scaled_value_floor = std::floor(*scaled_value);
*lower_bound = std::max(static_cast<int32_t>(scaled_value_floor),
static_cast<int32_t>(0));
*upper_bound =
std::min(static_cast<int32_t>(std::ceil(*scaled_value)), input_size - 1);
}
template <typename T>
inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_size_shape,
const int32_t* output_size_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
// If half_pixel_centers is True, align_corners must be False.
TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners);
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_size_shape =
RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
int32_t input_height = input_shape.Dims(1);
int32_t input_width = input_shape.Dims(2);
int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2);
int32_t output_height =
output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
int32_t output_width =
output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
float height_scale = static_cast<float>(input_height) / output_height;
float width_scale = static_cast<float>(input_width) / output_width;
if (op_params.align_corners && output_height > 1) {
height_scale = static_cast<float>(input_height - 1) / (output_height - 1);
}
if (op_params.align_corners && output_width > 1) {
width_scale = static_cast<float>(input_width - 1) / (output_width - 1);
}
const float rounding_offset = std::numeric_limits<T>::is_integer ? .5f : .0f;
for (int b = 0; b < batches; ++b) {
for (int y = 0; y < output_height; ++y) {
float input_y;
int32_t y0, y1;
ComputeInterpolationValues(y, height_scale, op_params.half_pixel_centers,
input_height, &input_y, &y0, &y1);
for (int x = 0; x < output_width; ++x) {
float input_x;
int32_t x0, x1;
ComputeInterpolationValues(x, width_scale, op_params.half_pixel_centers,
input_width, &input_x, &x0, &x1);
for (int c = 0; c < depth; ++c) {
T interpolation =
static_cast<T>(input_data[Offset(input_shape, b, y0, x0, c)] *
(1 - (input_y - y0)) * (1 - (input_x - x0)) +
input_data[Offset(input_shape, b, y1, x0, c)] *
(input_y - y0) * (1 - (input_x - x0)) +
input_data[Offset(input_shape, b, y0, x1, c)] *
(1 - (input_y - y0)) * (input_x - x0) +
input_data[Offset(input_shape, b, y1, x1, c)] *
(input_y - y0) * (input_x - x0) +
rounding_offset);
output_data[Offset(output_shape, b, y, x, c)] = interpolation;
}
}
}
}
}
inline void ComputeInterpolationValuesInteger(
const int32_t value, const int32_t scale_10, const bool half_pixel_centers,
int32_t input_size, int32_t* scaled_value, int32_t* lower_bound,
int32_t* upper_bound) {
if (half_pixel_centers) {
*scaled_value = value * scale_10 + scale_10 / 2 - (1 << 9);
} else {
*scaled_value = value * scale_10;
}
constexpr int32_t zero = 0;
*lower_bound = std::max(*scaled_value / (1 << 10), zero);
*upper_bound =
std::min((*scaled_value + (1 << 10) - 1) / (1 << 10), input_size - 1);
}
// Same as above but doesn't use any floating-point for the resize
template <typename T>
inline void ResizeBilinearInteger(
const tflite::ResizeBilinearParams& op_params,
const RuntimeShape& unextended_input_shape, const T* input_data,
const RuntimeShape& unextended_output_size_shape,
const int32_t* output_size_data,
const RuntimeShape& unextended_output_shape, T* output_data) {
// If half_pixel_centers is True, align_corners must be False.
TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners);
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_size_shape =
RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
const int32_t input_height = input_shape.Dims(1);
const int32_t input_width = input_shape.Dims(2);
const int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1);
TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2);
const int32_t output_height =
output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
const int32_t output_width =
output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
int32_t height_scale_10 =
((1 << 10) * input_height + output_height / 2) / output_height;
int32_t width_scale_10 =
((1 << 10) * input_width + output_width / 2) / output_width;
if (op_params.align_corners && output_height > 1) {
height_scale_10 =
((1 << 10) * (input_height - 1) + (output_height - 1) / 2) /
(output_height - 1);
}
if (op_params.align_corners && output_width > 1) {
width_scale_10 = ((1 << 10) * (input_width - 1) + (output_width - 1) / 2) /
(output_width - 1);
}
for (int b = 0; b < batches; ++b) {
for (int y = 0; y < output_height; ++y) {
int32_t input_y, y0, y1;
ComputeInterpolationValuesInteger(y, height_scale_10,
op_params.half_pixel_centers,
input_height, &input_y, &y0, &y1);
for (int x = 0; x < output_width; ++x) {
int32_t input_x, x0, x1;
ComputeInterpolationValuesInteger(x, width_scale_10,
op_params.half_pixel_centers,
input_width, &input_x, &x0, &x1);
for (int c = 0; c < depth; ++c) {
const int64_t output_20_ll =
static_cast<int64_t>(
input_data[Offset(input_shape, b, y0, x0, c)]) *
((1 << 10) - (input_y - (1 << 10) * y0)) *
((1 << 10) - (input_x - (1 << 10) * x0));
const int64_t output_20_lu =
static_cast<int64_t>(
input_data[Offset(input_shape, b, y1, x0, c)]) *
(input_y - (1 << 10) * y0) *
((1 << 10) - (input_x - (1 << 10) * x0));
const int64_t output_20_rl =
static_cast<int64_t>(
input_data[Offset(input_shape, b, y0, x1, c)]) *
((1 << 10) - (input_y - (1 << 10) * y0)) *
(input_x - (1 << 10) * x0);
const int64_t output_20_ru =
static_cast<int64_t>(
input_data[Offset(input_shape, b, y1, x1, c)]) *
(input_y - (1 << 10) * y0) * (input_x - (1 << 10) * x0);
const int64_t output_20 =
output_20_ll + output_20_lu + output_20_rl + output_20_ru;
const int64_t round = (output_20 > 0) ? (1 << 19) : -(1 << 19);
const T interpolation =
static_cast<T>((output_20 + round) / (1 << 20));
output_data[Offset(output_shape, b, y, x, c)] = interpolation;
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_

View File

@@ -159,7 +159,7 @@ inline int16_t SoftMaxCalculateExp(const SoftmaxParams& params,
std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
static_cast<int32_t>(32767));
// apply the exp() LUT activation function
return generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
return lut_lookup(sat_sym_scaled_diff, params.exp_lut);
}
// Quantized softmax with int16_t input and int16_t output.
inline void SoftmaxInt16(const SoftmaxParams& params,
@@ -207,8 +207,8 @@ inline void SoftmaxInt16(const SoftmaxParams& params,
std::min(std::max(sym_shifted_sum, static_cast<int32_t>(-32768)),
static_cast<int32_t>(32767)));
// apply 1/(1 + x) LUT activation function
int16_t reciprocal_scale_Q015 = generic_int16_table_lookup(
sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
int16_t reciprocal_scale_Q015 =
lut_lookup(sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
// Rescale the exp_result with reciprocal
// range of output is [0, 32767] correspond to [0.0, 1.0]

View File

@@ -0,0 +1,80 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
#include <cstdint>
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T>
inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
const RuntimeShape input_shape =
RuntimeShape::ExtendedShape(4, unextended_input_shape);
const RuntimeShape output_shape =
RuntimeShape::ExtendedShape(4, unextended_output_shape);
const int input_depth = input_shape.Dims(3);
const int input_width = input_shape.Dims(2);
const int input_height = input_shape.Dims(1);
const int input_batch = input_shape.Dims(0);
const int output_depth = output_shape.Dims(3);
const int output_width = output_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_batch = output_shape.Dims(0);
const int32_t block_size = op_params.block_size;
TFLITE_DCHECK_EQ(input_width, output_width * block_size);
TFLITE_DCHECK_EQ(input_height, output_height * block_size);
TFLITE_DCHECK_EQ(input_depth * block_size * block_size, output_depth);
TFLITE_DCHECK_EQ(input_batch, output_batch);
for (int in_b = 0; in_b < input_batch; ++in_b) {
for (int in_h = 0; in_h < input_height; ++in_h) {
for (int in_w = 0; in_w < input_width; ++in_w) {
for (int in_d = 0; in_d < input_depth; ++in_d) {
const int out_d =
in_d + ((in_h % block_size) * block_size + in_w % block_size) *
input_depth;
const int out_w = in_w / block_size;
const int out_h = in_h / block_size;
const int out_b = in_b;
const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
const int output_index =
Offset(output_shape, out_b, out_h, out_w, out_d);
output_data[output_index] = input_data[input_index];
}
}
}
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_

View File

@@ -0,0 +1,111 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_
#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
namespace reference_ops {
template <typename T, int N>
void TransposeImpl(const TransposeParams& params,
const RuntimeShape& unextended_input_shape,
const T* input_data,
const RuntimeShape& unextended_output_shape,
T* output_data) {
const int unextended_input_size = unextended_input_shape.DimensionsCount();
const int unextended_output_size = unextended_output_shape.DimensionsCount();
TFLITE_DCHECK_LE(unextended_input_size, N);
TFLITE_DCHECK_LE(unextended_output_size, N);
TFLITE_DCHECK_EQ(unextended_output_size, params.perm_count);
const int input_ext_size = N - unextended_input_size;
const int output_ext_size = N - unextended_output_size;
NdArrayDesc<N> input_desc;
NdArrayDesc<N> output_desc;
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape),
&input_desc);
CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
&output_desc);
// The perm data is extended to match the output, each index incremented by
// the amount of front padding of the input shape.
int extended_perm[N];
for (int i = 0; i < N; ++i) {
extended_perm[i] = i < output_ext_size
? i
: params.perm[i - output_ext_size] + input_ext_size;
}
// Permutes the input shape so we don't need to permute the indexes inside
// the loop. Check to make sure output_dims is matching input_dims.
NdArrayDesc<N> perm_input_desc;
for (int k = 0; k < N; ++k) {
TFLITE_DCHECK_EQ(input_desc.extents[extended_perm[k]],
output_desc.extents[k]);
perm_input_desc.extents[k] = input_desc.extents[extended_perm[k]];
perm_input_desc.strides[k] = input_desc.strides[extended_perm[k]];
}
// Naive transpose loop (iterate on output index and compute input index).
auto tranpose_func = [&](int indexes[N]) {
output_data[SubscriptToIndex(output_desc, indexes)] =
input_data[SubscriptToIndex(perm_input_desc, indexes)];
};
NDOpsHelper<N>(output_desc, tranpose_func);
}
template <typename T, int N = 5>
void Transpose(const TransposeParams& params,
const RuntimeShape& unextended_input_shape, const T* input_data,
const RuntimeShape& unextended_output_shape, T* output_data) {
// Transpose kernel only does rearranging values not numeric evaluations on
// each cell. It's safe to implement per size of scalar type and this trick
// keeps the total code size in a reasonable range.
switch (sizeof(T)) {
case 1:
TransposeImpl<int8_t, N>(params, unextended_input_shape,
reinterpret_cast<const int8_t*>(input_data),
unextended_output_shape,
reinterpret_cast<int8_t*>(output_data));
break;
case 2:
TransposeImpl<int16_t, N>(params, unextended_input_shape,
reinterpret_cast<const int16_t*>(input_data),
unextended_output_shape,
reinterpret_cast<int16_t*>(output_data));
break;
case 4:
TransposeImpl<int32_t, N>(params, unextended_input_shape,
reinterpret_cast<const int32_t*>(input_data),
unextended_output_shape,
reinterpret_cast<int32_t*>(output_data));
break;
case 8:
TransposeImpl<int64_t, N>(params, unextended_input_shape,
reinterpret_cast<const int64_t*>(input_data),
unextended_output_shape,
reinterpret_cast<int64_t*>(output_data));
break;
}
}
} // namespace reference_ops
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_H_

View File

@@ -400,13 +400,22 @@ inline size_t ReducedOutputOffset(const int num_dims, const int* dims,
return offset;
}
// Since tensors with '0' in their shape are valid in TF, these offset functions
// allow that as long as the corresponding index is also 0. It is upto the
// calling ops to ensure that they perform verification checks on tensor shapes
// if they don't support a particular behavior.
inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) {
TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4);
const int* dims_data = reinterpret_cast<const int*>(shape.DimsDataUpTo5D());
TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]);
TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]);
TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]);
TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]);
TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) ||
(i0 >= 0 && i0 < dims_data[0]));
TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) ||
(i1 >= 0 && i1 < dims_data[1]));
TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) ||
(i2 >= 0 && i2 < dims_data[2]));
TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) ||
(i3 >= 0 && i3 < dims_data[3]));
return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
}
@@ -414,21 +423,34 @@ inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3,
int i4) {
TFLITE_DCHECK_EQ(shape.DimensionsCount(), 5);
const int* dims_data = reinterpret_cast<const int*>(shape.DimsDataUpTo5D());
TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]);
TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]);
TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]);
TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]);
TFLITE_DCHECK(i4 >= 0 && i4 < dims_data[4]);
TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) ||
(i0 >= 0 && i0 < dims_data[0]));
TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) ||
(i1 >= 0 && i1 < dims_data[1]));
TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) ||
(i2 >= 0 && i2 < dims_data[2]));
TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) ||
(i3 >= 0 && i3 < dims_data[3]));
TFLITE_DCHECK((dims_data[4] == 0 && i4 == 0) ||
(i4 >= 0 && i4 < dims_data[4]));
return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) *
dims_data[4] +
i4;
}
inline int Offset(const RuntimeShape& shape, int* index) {
return Offset(shape, index[0], index[1], index[2], index[3]);
}
inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) {
TFLITE_DCHECK(i0 >= 0 && i0 < dims.sizes[0]);
TFLITE_DCHECK(i1 >= 0 && i1 < dims.sizes[1]);
TFLITE_DCHECK(i2 >= 0 && i2 < dims.sizes[2]);
TFLITE_DCHECK(i3 >= 0 && i3 < dims.sizes[3]);
TFLITE_DCHECK((i0 == 0 && dims.sizes[0] == 0) ||
(i0 >= 0 && i0 < dims.sizes[0]));
TFLITE_DCHECK((i1 == 0 && dims.sizes[1] == 0) ||
(i1 >= 0 && i1 < dims.sizes[1]));
TFLITE_DCHECK((i2 == 0 && dims.sizes[2] == 0) ||
(i2 >= 0 && i2 < dims.sizes[2]));
TFLITE_DCHECK((i3 == 0 && dims.sizes[3] == 0) ||
(i3 >= 0 && i3 < dims.sizes[3]));
return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] +
i3 * dims.strides[3];
}
@@ -437,10 +459,6 @@ inline int Offset(const Dims<4>& dims, int* index) {
return Offset(dims, index[0], index[1], index[2], index[3]);
}
inline int Offset(const RuntimeShape& shape, int* index) {
return Offset(shape, index[0], index[1], index[2], index[3]);
}
// Get array size, DCHECKing that the dim index is in range.
//
// Note that this will be phased out with Dims<4>, since RuntimeShape::Dims()
@@ -602,6 +620,58 @@ inline int MatchingFlatSize(const Dims<N>& dims, const Dims<N>& check_dims_0,
return MatchingFlatSize(dims, check_dims_1, check_dims_2, check_dims_3);
}
// Flat size calculation, checking if their extended shapes match.
inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0) {
const int shape_dims = shape.DimensionsCount();
const int check_shape_0_dims = check_shape_0.DimensionsCount();
const int min_dims = std::min(shape_dims, check_shape_0_dims);
for (int i = 0; i < min_dims; ++i) {
TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i),
check_shape_0.Dims(check_shape_0_dims - 1 - i));
}
for (int i = min_dims; i < shape_dims; ++i) {
TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i), 1);
}
for (int i = min_dims; i < check_shape_0_dims; ++i) {
TFLITE_DCHECK_EQ(check_shape_0.Dims(check_shape_0_dims - 1 - i), 1);
}
return shape.FlatSize();
}
inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0,
const RuntimeShape& check_shape_1) {
const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1),
flat_size);
return flat_size;
}
inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0,
const RuntimeShape& check_shape_1,
const RuntimeShape& check_shape_2) {
const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
TFLITE_DCHECK_EQ(
MatchingExtendedShapeFlatSize(shape, check_shape_1, check_shape_2),
flat_size);
return flat_size;
}
inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
const RuntimeShape& check_shape_0,
const RuntimeShape& check_shape_1,
const RuntimeShape& check_shape_2,
const RuntimeShape& check_shape_3) {
const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1,
check_shape_2, check_shape_3),
flat_size);
return flat_size;
}
// Data is required to be contiguous, and so many operators can use either the
// full array flat size or the flat size with one dimension skipped (commonly
// the depth).
@@ -885,6 +955,8 @@ struct Conv3DParams {
float float_activation_max;
};
typedef Conv3DParams Conv3DTransposeParams;
struct DepthToSpaceParams {
int32_t block_size;
};
@@ -1019,9 +1091,9 @@ struct PackParams {
struct PadParams {
int8_t left_padding_count;
int32_t left_padding[4];
int32_t left_padding[5];
int8_t right_padding_count;
int32_t right_padding[4];
int32_t right_padding[5];
ResizingCategory resizing_category;
};
@@ -1196,6 +1268,23 @@ inline void GetActivationParams(const P& params, int64_t* min, int64_t* max) {
*min = params.int64_activation_min;
*max = params.int64_activation_max;
}
// Type trait to check of given type has size smaller than 4 bytes.
template <typename T>
struct is_small_integer
: public std::integral_constant<bool,
std::is_same<T, int8_t>::value ||
std::is_same<T, uint8_t>::value ||
std::is_same<T, int16_t>::value ||
std::is_same<T, uint16_t>::value> {};
// Type trait to check of given type is int32 or int64.
template <typename T>
struct is_int32_or_int64
: public std::integral_constant<bool, std::is_same<T, int32_t>::value ||
std::is_same<T, int64_t>::value> {
};
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_

View File

@@ -119,6 +119,7 @@ TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node,
TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
int index) {
TfLiteTensor* tensor = GetMutableInput(context, node, index);
if (tensor == nullptr) return nullptr;
return tensor->is_variable ? tensor : nullptr;
}
@@ -197,7 +198,7 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
int32_t* output_activation_min, int32_t* output_activation_max,
int32_t* per_channel_multiplier, int* per_channel_shift) {
int32_t* per_channel_multiplier, int32_t* per_channel_shift) {
const auto* affine_quantization =
reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
return PopulateConvolutionQuantizationParams(
@@ -212,7 +213,8 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
int32_t* output_activation_min, int32_t* output_activation_max,
int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels) {
int32_t* per_channel_multiplier, int32_t* per_channel_shift,
int num_channels) {
TF_LITE_ENSURE_EQ(context, input->quantization.type,
kTfLiteAffineQuantization);
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
@@ -333,30 +335,49 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
}
namespace {
void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
int32_t qmin, int32_t qmax,
TfLiteTensor* output,
int32_t* act_min, int32_t* act_max) {
inline TfLiteStatus Quantize(TfLiteContext* context, float scale,
int32_t zero_point, float f, int32_t& q) {
const float tmp = TfLiteRound(f / scale);
const bool no_integer_overflow_from_quantization =
(tmp >= static_cast<float>(std::numeric_limits<int32_t>::min()) &&
tmp <= static_cast<float>(std::numeric_limits<int32_t>::max()));
TF_LITE_ENSURE(context, no_integer_overflow_from_quantization);
q = zero_point + static_cast<int32_t>(tmp);
return kTfLiteOk;
}
TfLiteStatus CalculateActivationRangeQuantizedImpl(
TfLiteContext* context, TfLiteFusedActivation activation, int32_t qmin,
int32_t qmax, TfLiteTensor* output, int32_t* act_min, int32_t* act_max) {
const auto scale = output->params.scale;
const auto zero_point = output->params.zero_point;
auto quantize = [scale, zero_point](float f) {
return zero_point + static_cast<int32_t>(TfLiteRound(f / scale));
};
int32_t tmp_q;
if (activation == kTfLiteActRelu) {
*act_min = std::max(qmin, quantize(0.0));
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 0.0, tmp_q));
*act_min = std::max(qmin, tmp_q);
*act_max = qmax;
} else if (activation == kTfLiteActRelu6) {
*act_min = std::max(qmin, quantize(0.0));
*act_max = std::min(qmax, quantize(6.0));
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 0.0, tmp_q));
*act_min = std::max(qmin, tmp_q);
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 6.0, tmp_q));
*act_max = std::min(qmax, tmp_q);
} else if (activation == kTfLiteActReluN1To1) {
*act_min = std::max(qmin, quantize(-1.0));
*act_max = std::min(qmax, quantize(1.0));
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, -1.0, tmp_q));
*act_min = std::max(qmin, tmp_q);
TF_LITE_ENSURE_OK(context,
Quantize(context, scale, zero_point, 1.0, tmp_q));
*act_max = std::min(qmax, tmp_q);
} else {
*act_min = qmin;
*act_max = qmax;
}
return kTfLiteOk;
}
} // namespace
@@ -380,9 +401,8 @@ TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
TF_LITE_ENSURE(context, false);
}
CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
act_max);
return kTfLiteOk;
return CalculateActivationRangeQuantizedImpl(context, activation, qmin, qmax,
output, act_min, act_max);
}
bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
@@ -412,18 +432,15 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
const TfLiteTensor* input1,
const TfLiteTensor* input2,
TfLiteIntArray** output_shape) {
int dims1 = NumDimensions(input1);
int dims2 = NumDimensions(input2);
int out_dims = std::max(dims1, dims2);
if (NumElements(input1) == 0) {
*output_shape = TfLiteIntArrayCopy(input1->dims);
return kTfLiteOk;
}
const int dims1 = NumDimensions(input1);
const int dims2 = NumDimensions(input2);
const int out_dims = std::max(dims1, dims2);
std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
for (int i = 0; i < out_dims; ++i) {
int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
if (!(d1 == d2 || d1 == 1 || d2 == 1)) {
context->ReportError(context,
"Given shapes, %s and %s, are not broadcastable.",
@@ -431,7 +448,12 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
GetShapeDebugString(input2->dims).c_str());
return kTfLiteError;
}
shape->data[out_dims - i - 1] = std::max(d1, d2);
if (d1 == 0 || d2 == 0) {
shape->data[out_dims - i - 1] = 0;
} else {
shape->data[out_dims - i - 1] = std::max(d1, d2);
}
}
*output_shape = shape.release();
return kTfLiteOk;
@@ -442,17 +464,20 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
const TfLiteTensor* input2,
const TfLiteTensor* input3,
TfLiteIntArray** output_shape) {
int dims1 = NumDimensions(input1);
int dims2 = NumDimensions(input2);
int dims3 = NumDimensions(input3);
int out_dims = std::max(std::max(dims1, dims2), dims3);
const int dims1 = NumDimensions(input1);
const int dims2 = NumDimensions(input2);
const int dims3 = NumDimensions(input3);
const int out_dims = std::max(std::max(dims1, dims2), dims3);
std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
for (int i = 0; i < out_dims; ++i) {
int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
const int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
const int min_value = std::min(std::min(d1, d2), d3);
int max_value = std::max(std::max(d1, d2), d3);
// If one dimention is 0, others must be 0 or 1.
if (min_value == 0) max_value = 0;
if (!(d1 == 1 || d1 == max_value) || !(d2 == 1 || d2 == max_value) ||
!(d3 == 1 || d3 == max_value)) {
context->ReportError(
@@ -473,42 +498,42 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
int TfLiteTypeGetSize(TfLiteType type) {
switch (type) {
case kTfLiteUInt8:
TF_LITE_ASSERT_EQ(sizeof(uint8_t), 1);
static_assert(sizeof(uint8_t) == 1, "");
return 1;
case kTfLiteInt8:
TF_LITE_ASSERT_EQ(sizeof(int8_t), 1);
static_assert(sizeof(int8_t) == 1, "");
return 1;
case kTfLiteBool:
return sizeof(bool);
case kTfLiteInt16:
TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
static_assert(sizeof(int16_t) == 2, "");
return 2;
case kTfLiteFloat16:
TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
static_assert(sizeof(int16_t) == 2, "");
return 2;
case kTfLiteFloat32:
TF_LITE_ASSERT_EQ(sizeof(float), 4);
static_assert(sizeof(float) == 4, "");
return 4;
case kTfLiteInt32:
TF_LITE_ASSERT_EQ(sizeof(int32_t), 4);
static_assert(sizeof(int32_t) == 4, "");
return 4;
case kTfLiteUInt32:
TF_LITE_ASSERT_EQ(sizeof(uint32_t), 4);
static_assert(sizeof(uint32_t) == 4, "");
return 4;
case kTfLiteInt64:
TF_LITE_ASSERT_EQ(sizeof(int64_t), 8);
static_assert(sizeof(int64_t) == 8, "");
return 8;
case kTfLiteUInt64:
TF_LITE_ASSERT_EQ(sizeof(uint64_t), 8);
static_assert(sizeof(uint64_t) == 8, "");
return 8;
case kTfLiteFloat64:
TF_LITE_ASSERT_EQ(sizeof(double), 8);
static_assert(sizeof(double) == 8, "");
return 8;
case kTfLiteComplex64:
TF_LITE_ASSERT_EQ(sizeof(std::complex<float>), 8);
static_assert(sizeof(std::complex<float>) == 8, "");
return 8;
case kTfLiteComplex128:
TF_LITE_ASSERT_EQ(sizeof(std::complex<double>), 16);
static_assert(sizeof(std::complex<double>) == 16, "");
return 16;
default:
return 0;

View File

@@ -214,14 +214,15 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
int32_t* output_activation_min, int32_t* output_activation_max,
int32_t* per_channel_multiplier, int* per_channel_shift);
int32_t* per_channel_multiplier, int32_t* per_channel_shift);
TfLiteStatus PopulateConvolutionQuantizationParams(
TfLiteContext* context, const TfLiteTensor* input,
const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
int32_t* output_activation_min, int32_t* output_activation_max,
int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels);
int32_t* per_channel_multiplier, int32_t* per_channel_shift,
int num_channels);
// Calculates the multiplication factor for a quantized convolution (or
// quantized depthwise convolution) involving the given tensors. Returns an

View File

@@ -15,69 +15,24 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
#define TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
// If we're on a platform without standard IO functions, fall back to a
// non-portable function.
#ifdef TF_LITE_MCU_DEBUG_LOG
#include "tensorflow/lite/micro/debug_log.h"
#define DEBUG_LOG(x) \
do { \
DebugLog(x); \
} while (0)
inline void InfiniteLoop() {
DEBUG_LOG("HALTED\n");
#if !defined(TF_LITE_MCU_DEBUG_LOG)
#include <cstdlib>
#define TFLITE_ABORT abort()
#else
inline void AbortImpl() {
DebugLog("HALTED\n");
while (1) {
}
}
#define TFLITE_ABORT AbortImpl();
#endif
#define TFLITE_ABORT InfiniteLoop();
#else // TF_LITE_MCU_DEBUG_LOG
#include <cstdio>
#include <cstdlib>
#define DEBUG_LOG(x) \
do { \
fprintf(stderr, "%s", (x)); \
} while (0)
// Report Error for unsupported type by op 'op_name' and returns kTfLiteError.
#define TF_LITE_UNSUPPORTED_TYPE(context, type, op_name) \
do { \
TF_LITE_KERNEL_LOG((context), "%s:%d Type %s is unsupported by op %s.", \
__FILE__, __LINE__, TfLiteTypeGetName(type), \
(op_name)); \
return kTfLiteError; \
} while (0)
#define TFLITE_ABORT abort()
#endif // TF_LITE_MCU_DEBUG_LOG
#if defined(NDEBUG) || defined(ARDUINO)
#if defined(NDEBUG)
#define TFLITE_ASSERT_FALSE (static_cast<void>(0))
#else
#define TFLITE_ASSERT_FALSE TFLITE_ABORT
#endif
#define TF_LITE_FATAL(msg) \
do { \
DEBUG_LOG(msg); \
DEBUG_LOG("\nFATAL\n"); \
TFLITE_ABORT; \
} while (0)
#define TF_LITE_ASSERT(x) \
do { \
if (!(x)) TF_LITE_FATAL(#x); \
} while (0)
#define TF_LITE_ASSERT_EQ(x, y) \
do { \
if ((x) != (y)) TF_LITE_FATAL(#x " didn't equal " #y); \
} while (0)
#endif // TENSORFLOW_LITE_KERNELS_OP_MACROS_H_

View File

@@ -20,7 +20,6 @@ limitations under the License.
namespace tflite {
// TODO(renjieliu): Migrate others to use ComputePaddingWithLeftover.
inline int ComputePadding(int stride, int dilation_rate, int in_size,
int filter_size, int out_size) {
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
@@ -45,6 +44,11 @@ inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
inline int ComputeOutSize(TfLitePadding padding, int image_size,
int filter_size, int stride, int dilation_rate = 1) {
int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
// TODO(b/186448822): This uses 0 since the function has no other way to
// report error case
if (stride == 0) return 0;
switch (padding) {
case kTfLitePaddingSame:
return (image_size + stride - 1) / stride;

View File

@@ -32,14 +32,18 @@ AllOpsResolver::AllOpsResolver() {
AddConcatenation();
AddConv2D();
AddCos();
AddCumSum();
AddDepthToSpace();
AddDepthwiseConv2D();
AddDequantize();
AddDetectionPostprocess();
AddDiv();
AddElu();
AddEqual();
AddEthosU();
AddExpandDims();
AddFloor();
AddFloorDiv();
AddFloorMod();
AddFullyConnected();
AddGreater();
AddGreaterEqual();
@@ -70,6 +74,7 @@ AllOpsResolver::AllOpsResolver() {
AddRelu();
AddRelu6();
AddReshape();
AddResizeBilinear();
AddResizeNearestNeighbor();
AddRound();
AddRsqrt();
@@ -77,6 +82,7 @@ AllOpsResolver::AllOpsResolver() {
AddSin();
AddSoftmax();
AddSpaceToBatchNd();
AddSpaceToDepth();
AddSplit();
AddSplitV();
AddSqrt();
@@ -87,6 +93,7 @@ AllOpsResolver::AllOpsResolver() {
AddSvdf();
AddTanh();
AddTransposeConv();
AddTranspose();
AddUnpack();
}

View File

@@ -0,0 +1,64 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/flatbuffer_utils.h"
namespace tflite {
FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size)
: flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {}
int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadInt64(elem, byte_width_);
}
uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadUInt64(elem, byte_width_);
}
int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const {
return static_cast<int32_t>(ElementAsInt64(i));
}
bool FlexbufferWrapper::ElementAsBool(size_t i) const {
return static_cast<bool>(ElementAsUInt64(i));
}
double FlexbufferWrapper::ElementAsDouble(size_t i) const {
const uint8_t* elem = data_ + i * byte_width_;
return ::flexbuffers::ReadDouble(elem, byte_width_);
}
float FlexbufferWrapper::ElementAsFloat(size_t i) const {
return static_cast<float>(FlexbufferWrapper::ElementAsDouble(i));
}
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
uint32_t NumSubgraphOperators(const SubGraph* subgraph) {
if (subgraph->operators() != nullptr) {
return subgraph->operators()->size();
} else {
return 0;
}
}
// TODO(b/192589496): Ops must always be there. Remove this function when fixed
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) {
const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
return NumSubgraphOperators(subgraph);
}
} // namespace tflite

View File

@@ -0,0 +1,56 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
#include "flatbuffers/flatbuffers.h"
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
// Kernels use flexbuffers::Map to pack their init parameters in a tflite file,
// with the parameter names as map keys and the parameter values as the
// corresponding map values.
// Accessing the map values using the flexbuffers:Map class is inline heavy,
// which can cause the code size to bloat beyond what's reasonable for a micro
// application. Use this class instead, when possible.
// FlexbufferWrapper takes advantage of the following properties of
// flexbuffers::Map:
// 1. It can be viewed as a flexbuffers::Vector of the values.
// 2. The values in the vector are ordered alphabetically by their keys.
// 3. All integer and Boolean values are stored as 64-bit numbers.
// 4. All floating point values are stored as double precision numbers.
// The properties are mentioned in the flexbuffers docs, but we rely on
// a unit test to catch design changes.
class FlexbufferWrapper : public flexbuffers::Vector {
public:
// Construct with a serialized flexbuffer 'buffer' of 'size' bytes
explicit FlexbufferWrapper(const uint8_t* buffer, size_t size);
int64_t ElementAsInt64(size_t i) const;
uint64_t ElementAsUInt64(size_t i) const;
int32_t ElementAsInt32(size_t i) const;
bool ElementAsBool(size_t i) const;
double ElementAsDouble(size_t i) const;
float ElementAsFloat(size_t i) const;
};
// Return the number of operators in a subgraph tflite
uint32_t NumSubgraphOperators(const SubGraph* subgraph);
uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx);
} // namespace tflite
#endif // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_

View File

@@ -1,4 +1,4 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/micro/kernels/activations.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
@@ -25,141 +27,21 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {
struct ReluOpData {
ReluParams params;
};
struct Relu6OpData {
int8_t six_int8;
int8_t zero_int8;
uint8_t six_uint8;
uint8_t zero_uint8;
};
} // namespace
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
template <typename T>
inline void ReluQuantized(const ReluOpData& data,
const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const T* input_data,
T* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t val = static_cast<int32_t>(input_data[i]);
int32_t clamped =
data.params.output_offset +
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
data.params.output_multiplier,
data.params.output_shift);
clamped = std::max(data.params.quantized_activation_min, clamped);
clamped = std::min(data.params.quantized_activation_max, clamped);
output_data[i] = static_cast<T>(clamped);
}
}
template <typename T>
inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
const RuntimeShape input_shape = GetTensorShape(input);
const RuntimeShape output_shape = GetTensorShape(output);
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
&data->params.output_shift);
data->params.quantized_activation_min = std::max(
static_cast<int32_t>(std::numeric_limits<T>::min()),
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float lower = 0.0f;
const float clamped = val < lower ? lower : val;
output_data[i] = clamped;
}
}
inline void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 6.0f;
const float lower = 0.0f;
const float clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
template <typename Q>
inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
const Q* input_data,
const RuntimeShape& output_shape, Q* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const Q val = input_data[i];
const Q clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
} else if (input->type == kTfLiteUInt8) {
CalculateReluOpData<uint8_t>(input, output, data);
}
return kTfLiteOk;
}
TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
@@ -171,19 +53,12 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
case kTfLiteInt8: {
ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
tflite::ReluQuantized(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorData<uint8_t>(output));
return kTfLiteOk;
}
default: {
TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
TfLiteTypeGetName(input->type));
@@ -197,34 +72,14 @@ void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
if (input->type == kTfLiteInt8) {
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_int8 = input->params.zero_point;
} else if (input->type == kTfLiteUInt8) {
data->six_uint8 = FloatToQuantizedType<uint8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_uint8 = input->params.zero_point;
}
return kTfLiteOk;
}
TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
switch (input->type) {
case kTfLiteFloat32: {
@@ -236,19 +91,11 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
case kTfLiteInt8: {
Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
case kTfLiteUInt8: {
Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
Relu6Quantized(data.zero_int8, data.six_int8,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}
default: {
@@ -259,13 +106,13 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
}
}
} // namespace activations
} // namespace
TfLiteRegistration Register_RELU() {
return {/*init=*/activations::ReluInit,
return {/*init=*/ReluInit,
/*free=*/nullptr,
/*prepare=*/activations::ReluPrepare,
/*invoke=*/activations::ReluEval,
/*prepare=*/ReluPrepare,
/*invoke=*/ReluEval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
@@ -273,16 +120,14 @@ TfLiteRegistration Register_RELU() {
}
TfLiteRegistration Register_RELU6() {
return {/*init=*/activations::Relu6Init,
return {/*init=*/Relu6Init,
/*free=*/nullptr,
/*prepare=*/activations::Relu6Prepare,
/*invoke=*/activations::Relu6Eval,
/*prepare=*/Relu6Prepare,
/*invoke=*/Relu6Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@@ -0,0 +1,63 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
extern const int kActivationsInputTensor;
extern const int kActivationsOutputTensor;
struct ReluOpData {
ReluParams params;
};
struct Relu6OpData {
int8_t six_int8;
int8_t zero_int8;
};
void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const int8_t* input_data,
int8_t* output_data);
template <typename T>
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data);
void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data);
void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data);
void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data);
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node);
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_

View File

@@ -0,0 +1,148 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <algorithm>
#include <cstdint>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activations.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
const int kActivationsInputTensor = 0;
const int kActivationsOutputTensor = 0;
void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
const RuntimeShape& output_shape, const int8_t* input_data,
int8_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int32_t val = static_cast<int32_t>(input_data[i]);
int32_t clamped =
data.params.output_offset +
MultiplyByQuantizedMultiplier(val - data.params.input_offset,
data.params.output_multiplier,
data.params.output_shift);
clamped = std::max(data.params.quantized_activation_min, clamped);
clamped = std::min(data.params.quantized_activation_max, clamped);
output_data[i] = static_cast<int8_t>(clamped);
}
}
template <typename T>
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
ReluOpData* data) {
float act_min = 0.0;
float act_max = std::numeric_limits<float>::infinity();
double real_multiplier =
static_cast<double>(input->params.scale / output->params.scale);
const RuntimeShape input_shape = GetTensorShape(input);
const RuntimeShape output_shape = GetTensorShape(output);
QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
&data->params.output_shift);
data->params.quantized_activation_min = std::max(
static_cast<int32_t>(std::numeric_limits<T>::min()),
output->params.zero_point +
static_cast<int32_t>(roundf(act_min / output->params.scale)));
data->params.quantized_activation_max =
act_max == std::numeric_limits<float>::infinity()
? static_cast<int32_t>(std::numeric_limits<T>::max())
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
output->params.zero_point +
static_cast<int32_t>(
roundf(act_max / output->params.scale)));
data->params.input_offset = input->params.zero_point;
data->params.output_offset = output->params.zero_point;
}
void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float lower = 0.0f;
const float clamped = val < lower ? lower : val;
output_data[i] = clamped;
}
}
void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
const RuntimeShape& output_shape, float* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const float val = input_data[i];
const float upper = 6.0f;
const float lower = 0.0f;
const float clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
const int8_t* input_data, const RuntimeShape& output_shape,
int8_t* output_data) {
const int flat_size = MatchingFlatSize(input_shape, output_shape);
for (int i = 0; i < flat_size; ++i) {
const int8_t val = input_data[i];
const int8_t clamped = val > upper ? upper : val < lower ? lower : val;
output_data[i] = clamped;
}
}
TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
TfLiteTensor* output = GetOutput(context, node, kActivationsOutputTensor);
TF_LITE_ENSURE(context, output != nullptr);
if (input->type == kTfLiteInt8) {
CalculateReluOpData<int8_t>(input, output, data);
}
return kTfLiteOk;
}
TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
const TfLiteTensor* input = GetInput(context, node, kActivationsInputTensor);
TF_LITE_ENSURE(context, input != nullptr);
if (input->type == kTfLiteInt8) {
data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
input->params.zero_point);
data->zero_int8 = input->params.zero_point;
}
return kTfLiteOk;
}
} // namespace tflite

View File

@@ -66,12 +66,12 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
OpData* data) {
data->requires_broadcast = !HaveSameShapes(input1, input2);
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
// 8bit -> 8bit general quantized path, with general rescalings
data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = 20;
data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
const double twice_max_input_scale =
2 * static_cast<double>(
std::max(input1->params.scale, input2->params.scale));
@@ -133,24 +133,25 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
switch (output->type) {
case kTfLiteInt8: {
if (need_broadcast) {
reference_integer_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
@@ -168,24 +169,32 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
}
} else {
break;
}
case kTfLiteInt16: {
if (need_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
tflite::micro::GetTensorData<int16_t>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<uint8_t>(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<uint8_t>(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
tflite::micro::GetTensorData<int16_t>(output),
false);
}
break;
}
default:
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
@@ -231,7 +240,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
if (output->type == kTfLiteFloat32) {
EvalAdd(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
} else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
input1, input2, output));
} else {

View File

@@ -18,6 +18,7 @@ limitations under the License.
#include <cstdint>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
@@ -28,6 +29,22 @@ namespace {
constexpr int kInputTensor0 = 0;
constexpr int kOutputTensor = 0;
constexpr int kAddNIntegerShift = 20;
// only used with INT8 tensors
struct OpData {
int32_t output_activation_min;
int32_t output_activation_max;
int32_t input_offset;
int32_t output_offset;
int32_t input_multiplier;
int32_t output_multiplier;
int input_shift;
int output_shift;
int left_shift;
int scratch_index;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
int num_inputs = NumInputs(node);
TF_LITE_ENSURE(context, num_inputs >= 2);
@@ -47,19 +64,61 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, i, &input));
TF_LITE_ENSURE(context, HaveSameShapes(input_tensor_first, input));
TF_LITE_ENSURE_TYPES_EQ(context, input_tensor_first->type, input->type);
// Check that all INT8 input tensors have the same zero-point and scale.
if (input_tensor_first->type == kTfLiteInt8) {
TF_LITE_ENSURE(context, input_tensor_first->params.zero_point ==
input->params.zero_point);
TF_LITE_ENSURE(context,
input_tensor_first->params.scale == input->params.scale);
}
}
// Allocate scratch buffer space for pointer to each tensor's data
// and store the scratch buffer index in the node's user_data
if (output->type == kTfLiteFloat32) {
// Allocate scratch buffer space for pointer to each tensor's data
// and store the scratch buffer index in the node's user_data
int scratch_index;
size_t scratch_size = sizeof(float*) * num_inputs;
TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena(
context, scratch_size, &scratch_index));
node->user_data =
reinterpret_cast<decltype(node->user_data)>(scratch_index);
} else if (output->type == kTfLiteInt8) {
node->user_data =
context->AllocatePersistentBuffer(context, sizeof(OpData));
OpData* data = static_cast<OpData*>(node->user_data);
// Allocate scratch buffer space for pointer to each tensor's data
// and store the scratch buffer index in OpData
size_t scratch_size = sizeof(int8_t*) * num_inputs;
TF_LITE_ENSURE_OK(
context, context->RequestScratchBufferInArena(context, scratch_size,
&data->scratch_index));
// 8bit -> 8bit general quantized path, with general rescalings
data->input_offset = -input_tensor_first->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = kAddNIntegerShift;
const double twice_max_input_scale =
2 * static_cast<double>(input_tensor_first->params.scale);
const double real_input_multiplier =
static_cast<double>(input_tensor_first->params.scale) /
twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << data->left_shift) * static_cast<double>(output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input_multiplier, &data->input_multiplier, &data->input_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, kTfLiteActNone, output, &data->output_activation_min,
&data->output_activation_max));
} else {
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
@@ -72,12 +131,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
}
template <typename T>
void EvalAddN(TfLiteContext* context, TfLiteNode* node,
TfLiteEvalTensor* output) {
inline const T** CopyInputsToScratchBuffer(TfLiteContext* context,
TfLiteNode* node,
const int scratch_index) {
int num_inputs = NumInputs(node);
int scratch_index =
static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
void* scratch_buffer = context->GetScratchBuffer(context, scratch_index);
const T** all_inputs = static_cast<decltype(all_inputs)>(scratch_buffer);
for (int i = 0; i < num_inputs; i++) {
@@ -86,17 +143,56 @@ void EvalAddN(TfLiteContext* context, TfLiteNode* node,
all_inputs[i] = tflite::micro::GetTensorData<T>(next_input);
}
return all_inputs;
}
template <typename T>
void EvalAddN(TfLiteContext* context, TfLiteNode* node,
TfLiteEvalTensor* output) {
int num_inputs = NumInputs(node);
int scratch_index =
static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
const T** all_inputs =
CopyInputsToScratchBuffer<T>(context, node, scratch_index);
reference_ops::AddN<T>(tflite::micro::GetTensorShape(output), num_inputs,
all_inputs, tflite::micro::GetTensorData<T>(output));
}
template <typename T>
void EvalAddNQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteEvalTensor* output) {
int num_inputs = NumInputs(node);
OpData* data = static_cast<OpData*>(node->user_data);
const T** all_inputs =
CopyInputsToScratchBuffer<T>(context, node, data->scratch_index);
ArithmeticParams params;
params.left_shift = data->left_shift;
params.input1_offset = data->input_offset;
params.input1_multiplier = data->input_multiplier;
params.input1_shift = data->input_shift;
params.output_offset = data->output_offset;
params.output_multiplier = data->output_multiplier;
params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min, data->output_activation_max,
&params);
reference_ops::AddN(params, tflite::micro::GetTensorShape(output), num_inputs,
all_inputs, tflite::micro::GetTensorData<T>(output));
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalAddN<float>(context, node, output);
} else if (output->type == kTfLiteInt8) {
EvalAddNQuantized<int8_t>(context, node, output);
} else {
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}

View File

@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#define FLATBUFFERS_LOCALE_INDEPENDENT 0
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
@@ -22,6 +20,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/flatbuffer_utils.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
/*
@@ -56,6 +55,11 @@ namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
// Indices into the init flexbuffer's vector.
// The parameter's name is in the comment that follows.
// Elements in the vectors are ordered alphabetically by parameter name.
constexpr int kCyclesMaxIndex = 0; // 'cycles_max'
// TODO(b/149795762): Add this to TfLiteStatus enum.
constexpr TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
@@ -76,8 +80,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
if (buffer != nullptr && length > 0) {
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
op_data->cycles_max = m["cycles_max"].AsInt32();
tflite::FlexbufferWrapper wrapper(buffer_t, length);
op_data->cycles_max = wrapper.ElementAsInt32(kCyclesMaxIndex);
} else {
op_data->cycles_max = 0;
}
@@ -118,6 +122,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing
// https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing
if (output->dims->data[1] == 5 || output->dims->data[1] == 13 ||
output->dims->data[1] == 25 ||
(cb_prepare_count == 5 && output->dims->data[2] == 2 &&
output->dims->data[3] == 96)) {
op_data->cycles_max = 1;

View File

@@ -147,8 +147,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
TF_LITE_ENSURE(context,
input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
input_type == kTfLiteInt8 || input_type == kTfLiteInt32 ||
input_type == kTfLiteInt64);
input_type == kTfLiteInt8 || input_type == kTfLiteInt16 ||
input_type == kTfLiteInt32 || input_type == kTfLiteInt64);
// Output type must match input type
TF_LITE_ENSURE_EQ(context, output_type, input_type);
@@ -182,6 +182,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
switch (output_type) { // Already know in/outtypes are same.
case kTfLiteFloat32:
case kTfLiteInt16:
case kTfLiteInt32:
case kTfLiteInt64: {
data->params.axis = CalculatePositiveAxis(params->axis, output);
@@ -247,6 +248,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
case kTfLiteInt64:
EvalUnquantized<int64_t>(context, node);
break;
case kTfLiteInt16:
EvalUnquantized<int16_t>(context, node);
break;
default:
TF_LITE_KERNEL_LOG(

View File

@@ -53,8 +53,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
"Hybrid models are not supported on TFLite Micro.");
TF_LITE_ENSURE_MSG(
context,
input->type == filter->type ||
(input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
"Hybrid models are not supported on TFLite Micro.");
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32: {
@@ -70,6 +73,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorShape(nullptr), nullptr);
break;
}
case kTfLiteInt16: {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int16_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<std::int64_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
break;
}
case kTfLiteInt8: {
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, data), data.per_channel_output_multiplier,

View File

@@ -72,6 +72,21 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);
// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_CONV_2D();
#if defined(XTENSA)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 inputs and outputs.
TfLiteRegistration Register_CONV_2D_INT8REF();
#else
inline TfLiteRegistration Register_CONV_2D_INT8REF() {
return Register_CONV_2D();
}
#endif
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_

View File

@@ -111,8 +111,7 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
context, input, filter, bias, output, params.activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
data->per_channel_output_multiplier, data->per_channel_output_shift,
output_channels));
}
@@ -155,7 +154,7 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
context, num_channels * sizeof(int32_t)));
// All per-channel quantized tensors need valid zero point and scale arrays.
if (input->type == kTfLiteInt8) {
if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
kTfLiteAffineQuantization);

View File

@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
@@ -59,36 +59,45 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
TfLiteRegistration registration,
uint8_t* output_data, float tolerance = 1e-5);
TfLiteStatus TestConvFloat(const int* input_dims_data, const float* input_data,
const int* filter_dims_data,
const float* filter_data, const int* bias_dims_data,
const float* bias_data, const int* output_dims_data,
TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data,
int* filter_dims_data, const float* filter_data,
int* bias_dims_data, const float* bias_data,
int* output_dims_data,
const float* expected_output_data,
TfLiteConvParams* conv_params,
TfLiteRegistration registration, float* output_data);
TfLiteStatus TestConvQuantizedPerLayer(
const int* input_dims_data, const float* input_data,
uint8_t* input_quantized, float input_scale, const int* filter_dims_data,
const float* filter_data, uint8_t* filter_quantized, float filter_scale,
const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized,
const int* output_dims_data, const float* expected_output_data,
uint8_t* expected_output_quantized, float output_scale,
TfLiteConvParams* conv_params, TfLiteRegistration registration,
uint8_t* output_data);
int* input_dims_data, const float* input_data, uint8_t* input_quantized,
float input_scale, int* filter_dims_data, const float* filter_data,
uint8_t* filter_quantized, float filter_scale, int* bias_dims_data,
const float* bias_data, int32_t* bias_quantized, int* output_dims_data,
const float* expected_output_data, uint8_t* expected_output_quantized,
float output_scale, TfLiteConvParams* conv_params,
TfLiteRegistration registration, uint8_t* output_data);
TfLiteStatus TestConvQuantizedPerChannel(
const int* input_dims_data, const float* input_data,
int8_t* input_quantized, float input_scale, int input_zero_point,
const int* filter_dims_data, const float* filter_data,
int8_t* filter_data_quantized, const int* bias_dims_data,
const float* bias_data, int32_t* bias_data_quantized, float* bias_scales,
int* bias_zero_points, const int* output_dims_data,
int* input_dims_data, const float* input_data, int8_t* input_quantized,
float input_scale, int input_zero_point, int* filter_dims_data,
const float* filter_data, int8_t* filter_data_quantized,
int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
float* bias_scales, int* bias_zero_points, int* output_dims_data,
const float* expected_output_data, int8_t* expected_output_data_quantized,
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
TfLiteRegistration registration, int8_t* output_data);
TfLiteStatus TestConvQuantizedPerChannel(
int* input_dims_data, const float* input_data, int16_t* input_quantized,
float input_scale, int input_zero_point, int* filter_dims_data,
const float* filter_data, int8_t* filter_data_quantized,
int* bias_dims_data, const float* bias_data,
std::int64_t* bias_data_quantized, float* bias_scales,
int* bias_zero_points, int* output_dims_data,
const float* expected_output_data, int16_t* expected_output_data_quantized,
float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
TfLiteRegistration registration, int16_t* output_data);
} // namespace testing
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_

View File

@@ -0,0 +1,173 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/cumsum.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kAxisTensor = 1;
constexpr int kOutputTensor = 0;
constexpr int kCumSumIntegerShift = 20;
// only used with INT8 tensors
struct OpData {
int32_t output_activation_min;
int32_t output_activation_max;
int32_t input_offset;
int32_t output_offset;
int32_t input_multiplier;
int32_t output_multiplier;
int input_shift;
int output_shift;
int left_shift;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
const TfLiteTensor* axis = GetInput(context, node, kAxisTensor);
TF_LITE_ENSURE(context,
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, axis->type, kTfLiteInt32);
TF_LITE_ENSURE_EQ(context, NumElements(axis), 1);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_EQ(context, input->type, output->type);
TF_LITE_ENSURE(context, HaveSameShapes(input, output));
if (output->type == kTfLiteInt8) {
node->user_data =
context->AllocatePersistentBuffer(context, sizeof(OpData));
OpData* data = static_cast<OpData*>(node->user_data);
// 8bit -> 8bit general quantized path, with general rescalings
data->input_offset = -input->params.zero_point;
data->output_offset = output->params.zero_point;
data->left_shift = kCumSumIntegerShift;
const double twice_max_input_scale =
2 * static_cast<double>(input->params.scale);
const double real_input_multiplier =
static_cast<double>(input->params.scale) / twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << data->left_shift) * static_cast<double>(output->params.scale));
QuantizeMultiplierSmallerThanOneExp(
real_input_multiplier, &data->input_multiplier, &data->input_shift);
QuantizeMultiplierSmallerThanOneExp(
real_output_multiplier, &data->output_multiplier, &data->output_shift);
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, kTfLiteActNone, output, &data->output_activation_min,
&data->output_activation_max));
}
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
const TfLiteEvalTensor* axis_tensor =
tflite::micro::GetEvalInput(context, node, kAxisTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
auto* cs_params = static_cast<TfLiteCumsumParams*>(node->builtin_data);
auto input_shape = tflite::micro::GetTensorShape(input);
int32_t axis = *tflite::micro::GetTensorData<int32_t>(axis_tensor);
if (axis < 0) axis += input_shape.DimensionsCount();
if (axis < 0 || axis >= input_shape.DimensionsCount()) {
TF_LITE_KERNEL_LOG(context, "CUMSUM Invalid axis: %d", axis);
return kTfLiteError;
}
switch (input->type) {
case kTfLiteFloat32: {
reference_ops::CumSum(tflite::micro::GetTensorData<float>(input),
input_shape, axis, cs_params->exclusive,
cs_params->reverse,
tflite::micro::GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteInt8: {
auto* data = static_cast<OpData*>(node->user_data);
ArithmeticParams params;
params.left_shift = data->left_shift;
params.input1_offset = data->input_offset;
params.input1_multiplier = data->input_multiplier;
params.input1_shift = data->input_shift;
params.output_offset = data->output_offset;
params.output_multiplier = data->output_multiplier;
params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &params);
reference_ops::CumSum(params, tflite::micro::GetTensorData<int8_t>(input),
input_shape, axis, cs_params->exclusive,
cs_params->reverse,
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
} break;
default: {
TF_LITE_KERNEL_LOG(context,
"CUMSUM only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
}
return kTfLiteError;
}
} // namespace
TfLiteRegistration Register_CUMSUM() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,143 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
#include <stdint.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor = 0;
constexpr int kOutputTensor = 0;
// input/output tensor shape rank associations
constexpr int kBatchRank = 0;
constexpr int kHeightRank = 1;
constexpr int kWidthRank = 2;
constexpr int kDepthRank = 3;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
auto data_type = output->type;
TF_LITE_ENSURE(context,
data_type == kTfLiteFloat32 || data_type == kTfLiteInt8);
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
const int block_size = params->block_size;
TF_LITE_ENSURE(context, block_size > 0);
const int input_height = input->dims->data[kHeightRank];
const int input_width = input->dims->data[kWidthRank];
const int input_channels = input->dims->data[kDepthRank];
int output_height = input_height * block_size;
int output_width = input_width * block_size;
int output_channels = input_channels / block_size / block_size;
TF_LITE_ENSURE_EQ(context, input_height, output_height / block_size);
TF_LITE_ENSURE_EQ(context, input_width, output_width / block_size);
TF_LITE_ENSURE_EQ(context, input_channels,
output_channels * block_size * block_size);
// We must update the output tensor dimensions.
// The dims storage is expected to be the same area in memory
// for both TfLiteTensor and TfLiteEvalTensor. This is important
// because TfLiteTensor in the MicroInterpreter is a temporary
// allocation. For the KernelRunner interpreter, TfLiteEvalTensor
// is a temporary allocation. We must therefore relocate the dims
// from the FlatBuffer to the persistant storage arena.
TfLiteEvalTensor* output_eval =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
context, output, output_eval));
output->dims->data[kBatchRank] = input->dims->data[kBatchRank];
output->dims->data[kHeightRank] = output_height;
output->dims->data[kWidthRank] = output_width;
output->dims->data[kDepthRank] = output_channels;
return kTfLiteOk;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
auto* params =
reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kInputTensor);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
tflite::DepthToSpaceParams op_params;
op_params.block_size = static_cast<int32_t>(params->block_size);
switch (input->type) { // Already know in/out types are same.
case kTfLiteFloat32:
reference_ops::DepthToSpace(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
case kTfLiteInt8:
reference_ops::DepthToSpace(op_params,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(
context, "DEPTH_TO_SPACE only supports FLOAT32 and INT8, got %s.",
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_DEPTH_TO_SPACE() {
return {/*init=*/nullptr,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -20,7 +20,6 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"

View File

@@ -18,7 +18,6 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
@@ -113,8 +112,7 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
context, input, filter, bias, output, params.activation,
&data->output_multiplier, &data->output_shift,
&data->output_activation_min, &data->output_activation_max,
data->per_channel_output_multiplier,
reinterpret_cast<int*>(data->per_channel_output_shift),
data->per_channel_output_multiplier, data->per_channel_output_shift,
output_channels));
}

View File

@@ -15,7 +15,6 @@ limitations under the License.
#include <numeric>
#define FLATBUFFERS_LOCALE_INDEPENDENT 0
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
@@ -117,12 +116,11 @@ struct OpData {
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
OpData* op_data = nullptr;
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
op_data = reinterpret_cast<OpData*>(
context->AllocatePersistentBuffer(context, sizeof(OpData)));

View File

@@ -1,206 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/div.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace {
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
struct OpData {
// Parameters used in the quantized paths where the output is 8bit
int32_t input1_zero_point;
int32_t input2_zero_point;
int32_t output_zero_point;
int32_t output_activation_min;
int32_t output_activation_max;
// Parameters used in all quantized paths
int32_t output_multiplier;
int output_shift;
};
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
TfLiteDivParams* params, OpData* data) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &data->output_activation_min,
&data->output_activation_max));
const double real_multiplier = static_cast<double>(
input1->params.scale / (input2->params.scale * output->params.scale));
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
&data->output_shift);
data->input1_zero_point = input1->params.zero_point;
data->input2_zero_point = input2->params.zero_point;
data->output_zero_point = output->params.zero_point;
}
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
auto* data = static_cast<OpData*>(node->user_data);
return CalculateOpData(context, node, params, data);
}
void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
const OpData* data, const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
#define TF_LITE_DIV(type, opname, data_type) \
data_type output_activation_min, output_activation_max; \
CalculateActivationRange(params->activation, &output_activation_min, \
&output_activation_max); \
SetActivationParams(output_activation_min, output_activation_max, \
&op_params); \
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<data_type>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<data_type>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<data_type>(output))
bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (requires_broadcast) {
TF_LITE_DIV(reference_ops, BroadcastDivSlow, float);
} else {
TF_LITE_DIV(reference_ops, Div, float);
}
#undef TF_LITE_DIV
}
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
TfLiteDivParams* params, const OpData* data,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
tflite::ArithmeticParams op_params = {};
#define TF_LITE_DIV(type, opname, dtype) \
type::opname(op_params, tflite::micro::GetTensorShape(input1), \
tflite::micro::GetTensorData<dtype>(input1), \
tflite::micro::GetTensorShape(input2), \
tflite::micro::GetTensorData<dtype>(input2), \
tflite::micro::GetTensorShape(output), \
tflite::micro::GetTensorData<dtype>(output))
if (input1->type == kTfLiteInt8 && input2->type == kTfLiteInt8 &&
output->type == kTfLiteInt8) {
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
op_params.input1_offset = -data->input1_zero_point;
op_params.input2_offset = -data->input2_zero_point;
op_params.output_offset = data->output_zero_point;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
bool requires_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (requires_broadcast) {
TF_LITE_DIV(reference_ops, BroadcastDivSlow, int8_t);
} else {
TF_LITE_DIV(reference_ops, Div, int8_t);
}
#undef TF_LITE_DIV
} else {
TF_LITE_KERNEL_LOG(
context, "Unsupported combination of input and output types in DIV.");
return kTfLiteError;
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->builtin_data != nullptr);
auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
TFLITE_DCHECK(node->user_data != nullptr);
auto* data = static_cast<OpData*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
if (output->type == kTfLiteFloat32) {
EvalDiv(context, node, params, data, input1, input2, output);
} else if (output->type == kTfLiteInt8) {
TF_LITE_ENSURE_OK(context, EvalQuantized(context, node, params, data,
input1, input2, output));
} else {
TF_LITE_KERNEL_LOG(context,
"DIV only supports FLOAT32, quantized INT8 "
"now, got type %s (%d).",
TfLiteTypeGetName(output->type), output->type);
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_DIV() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -25,6 +25,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
namespace tflite {
namespace {
@@ -45,7 +46,10 @@ using TransformFunc = float (*)(float);
template <typename T>
void PopulateLookupTable(const TfLiteTensor* input, const TfLiteTensor* output,
const TransformFunc transform, OpData* data) {
if (sizeof(T) != 1) TF_LITE_FATAL("Lookup table valid only for 8bit");
if (sizeof(T) != 1) {
MicroPrintf("Lookup table valid only for 8bit");
TFLITE_ABORT;
}
const float inverse_scale = 1 / output->params.scale;
int32_t maxval = std::numeric_limits<T>::max();

View File

@@ -0,0 +1,130 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
// Input/output tensor index.
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
return nullptr;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
template <typename T>
TfLiteStatus EvalFloorDiv(TfLiteContext* context,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
// Validate the denominator.
for (int i = 0; i < tflite::ElementCount(*input2->dims); ++i) {
if (std::equal_to<T>()(denominator_data[i], 0)) {
TF_LITE_KERNEL_LOG(context, "Division by 0");
return kTfLiteError;
}
}
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
if (requires_broadcast) {
reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
} else {
reference_ops::BinaryFunction<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
switch (input1->type) {
case kTfLiteFloat32: {
return EvalFloorDiv<float>(context, input1, input2, output);
}
default: {
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by FLOOR_DIV.",
TfLiteTypeGetName(input1->type));
return kTfLiteError;
}
}
}
} // namespace
TfLiteRegistration Register_FLOOR_DIV() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -0,0 +1,128 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_utils.h"
// OLD-TODO(b/117523611): We should factor out a binary_op and put binary ops
// there.
namespace tflite {
namespace {
// Input/output tensor index.
constexpr int kInputTensor1 = 0;
constexpr int kInputTensor2 = 1;
constexpr int kOutputTensor = 0;
// OLD-TODO(b/117912880): Support quantization.
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor1, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context,
GetInputSafe(context, node, kInputTensor2, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context,
GetOutputSafe(context, node, kOutputTensor, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
return kTfLiteOk;
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
return nullptr;
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return CalculateOpData(context, node);
}
template <typename T>
TfLiteStatus EvalFloorMod(TfLiteContext* context, bool requires_broadcast,
const TfLiteEvalTensor* input1,
const TfLiteEvalTensor* input2,
TfLiteEvalTensor* output) {
const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
if (requires_broadcast) {
reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
} else {
reference_ops::BinaryFunction<T, T, T>(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<T>(input1),
tflite::micro::GetTensorShape(input2), denominator_data,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
}
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kOutputTensor);
bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
switch (input1->type) {
case kTfLiteFloat32: {
return EvalFloorMod<float>(context, requires_broadcast, input1, input2,
output);
}
default: {
TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by FLOOR_MOD.",
TfLiteTypeGetName(input1->type));
return kTfLiteError;
}
}
}
} // namespace
TfLiteRegistration Register_FLOOR_MOD() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}
} // namespace tflite

View File

@@ -109,19 +109,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
break;
}
case kTfLiteUInt8: {
tflite::reference_ops::FullyConnected(
FullyConnectedParamsQuantized(data),
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<uint8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
}
default: {
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
TfLiteTypeGetName(input->type), input->type);

Some files were not shown because too many files have changed in this diff Show More