removed esp-nn

This commit is contained in:
CaCO3
2022-09-23 21:17:44 +02:00
parent 8a170a7e16
commit b06b42f0e9
45 changed files with 0 additions and 5600 deletions

View File

@@ -1,57 +0,0 @@
.config
*.o
*.i
*.s
*.orig
*.pyc
# gtags
GTAGS
GRTAGS
GPATH
# emacs
.dir-locals.el
# emacs temp file suffixes
*~
.#*
\#*#
# eclipse setting
.settings
# MacOS directory files
.DS_Store
# Example project files
examples/**/sdkconfig
examples/**/sdkconfig.old
examples/**/build
# Test app files
test_app/build
test_app/sdkconfig
test_app/sdkconfig.old
# Doc build artifacts
docs/_build/
docs/doxygen-warning-log.txt
docs/sphinx-warning-log.txt
docs/sphinx-warning-log-sanitized.txt
docs/xml/
docs/xml_in/
docs/man/
docs/doxygen_sqlite3.db
TEST_LOGS
# gcov coverage reports
*.gcda
*.gcno
coverage.info
coverage_report/
# VS Code Settings
.vscode/

View File

@@ -1,55 +0,0 @@
stages:
- build
variables:
BATCH_BUILD: "1"
V: "0"
MAKEFLAGS: "-j8 --no-keep-going"
IDF_PATH: "$CI_PROJECT_DIR/esp-idf"
LOG_PATH: "$CI_PROJECT_DIR"
.set_git_config: &set_git_config
# Set git config
- git config user.email "test@espressif.com"
- git config user.name "Espressif"
.add_ssh_key: &add_ssh_key
# Add gitlab ssh key
- mkdir -p ~/.ssh
- chmod 700 ~/.ssh
- echo -n $GITLAB_KEY > ~/.ssh/id_rsa_base64
- base64 --decode --ignore-garbage ~/.ssh/id_rsa_base64 > ~/.ssh/id_rsa
- chmod 600 ~/.ssh/id_rsa
- echo -e "Host gitlab.espressif.cn\n\tStrictHostKeyChecking no\n" >> ~/.ssh/config
before_script:
# Add gitlab ssh key
- *add_ssh_key
# Set git config
- *set_git_config
.build_esp32s3: &build_esp32s3
- idf.py set-target esp32s3 build
.build_esp32: &build_esp32
- idf.py set-target esp32 build
build_demo:
stage: build
image: $CI_DOCKER_REGISTRY/esp32-ci-env:esp-nn
tags:
- build
script:
# Clone IDF
- git clone --recursive --single-branch -b release/v4.4 --reference-if-able /local_references/gitlab/ https://gitlab-ci-token:${BOT_TOKEN}@gitlab.espressif.cn:6688/espressif/esp-idf.git
- cd esp-idf
- ./install.sh
- . ./export.sh
- cd ..
# Build examples now
- cd test_app
# Build esp32s3
- *build_esp32s3
# Build esp32
- *build_esp32
- cd -

View File

@@ -1,50 +0,0 @@
idf_build_get_property(idf_target IDF_TARGET)
set(c_srcs
"src/activation_functions/esp_nn_relu_ansi.c"
"src/basic_math/esp_nn_add_ansi.c"
"src/basic_math/esp_nn_mul_ansi.c"
"src/convolution/esp_nn_conv_ansi.c"
"src/convolution/esp_nn_conv_opt.c"
"src/convolution/esp_nn_depthwise_conv_ansi.c"
"src/convolution/esp_nn_depthwise_conv_opt.c"
"src/fully_connected/esp_nn_fully_connected_ansi.c"
"src/softmax/esp_nn_softmax_ansi.c"
"src/softmax/esp_nn_softmax_opt.c"
"src/pooling/esp_nn_avg_pool_ansi.c"
"src/pooling/esp_nn_max_pool_ansi.c")
if(CONFIG_IDF_TARGET_ESP32S3)
set(s3_srcs
"src/common/esp_nn_common_functions_esp32s3.S"
"src/common/esp_nn_multiply_by_quantized_mult_esp32s3.S"
"src/common/esp_nn_multiply_by_quantized_mult_ver1_esp32s3.S"
"src/activation_functions/esp_nn_relu_s8_esp32s3.S"
"src/basic_math/esp_nn_add_s8_esp32s3.S"
"src/basic_math/esp_nn_mul_s8_esp32s3.S"
"src/convolution/esp_nn_conv_esp32s3.c"
"src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c"
"src/convolution/esp_nn_conv_s16_mult8_esp32s3.S"
"src/convolution/esp_nn_conv_s8_mult8_1x1_esp32s3.S"
"src/convolution/esp_nn_conv_s16_mult4_1x1_esp32s3.S"
"src/convolution/esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3.S"
"src/convolution/esp_nn_depthwise_conv_s16_mult1_esp32s3.S"
"src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3.S"
"src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3.S"
"src/convolution/esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3.S"
"src/convolution/esp_nn_depthwise_conv_s16_mult4_esp32s3.S"
"src/convolution/esp_nn_depthwise_conv_s16_mult8_esp32s3.S"
"src/fully_connected/esp_nn_fully_connected_s8_esp32s3.S"
"src/pooling/esp_nn_max_pool_s8_esp32s3.S"
"src/pooling/esp_nn_avg_pool_s8_esp32s3.S")
endif()
idf_component_register(SRCS "${c_srcs}"
"${s3_srcs}"
INCLUDE_DIRS "include" "src/common")
if(CONFIG_IDF_TARGET_ESP32S3)
target_compile_options(${COMPONENT_LIB} PRIVATE -mlongcalls -fno-unroll-loops -O2 -Wno-unused-function)
else()
target_compile_options(${COMPONENT_LIB} PRIVATE -Wno-unused-function)
endif()

View File

@@ -1,29 +0,0 @@
menu "ESP-NN"
choice NN_OPTIMIZATIONS
bool "Optimization for nn functions"
default NN_OPTIMIZED
help
Use ANSI-C versions for verification and debug purpose.
Optimisations are automatically picked up for a chipset.
For ESP32-S3, assembly optimisations are selected.
For other platforms(viz., ESP32, ESP32-C3), generic optimisations are used.
config NN_ANSI_C
bool "ANSI C"
help
ANSI C versions for verification and debug purposes.
config NN_OPTIMIZED
bool "Optimized versions"
help
Optimisations are automatically picked up for a chipset.
For ESP32-S3, assembly optimisations are selected.
For other platforms(viz., ESP32, ESP32-C3), generic optimisations are used.
endchoice
config NN_OPTIMIZATIONS
int
default 0 if NN_ANSI_C
default 1 if NN_OPTIMIZED
endmenu

View File

@@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -1,55 +0,0 @@
# ESP-NN
The library contains optimised NN (Neural Network) functions for various Espressif chipsets.
* Supported platforms:
* TensorFlow Lite Micro (TFLite Micro). Repo can be found [here](https://github.com/espressif/tflite-micro-esp-examples)
* Supported ESP chipsets include:
* ESP32-S3 (Assembly versions optimised to benefit from vector instructions of ESP32-S3)
* ESP32 (Generic optimisations)
* ESP32-C3 (Generic optimisations)
## Performance
### Kernelwise performance for s8 versions:
* Kernelwise performance on ESP32-S3 chip
* Numbers are ticks taken for kernel to execute
* Chip config: 240MHz, SPI: QPI 80MHz, Data cache: 64KB
| Function | ANSI C | ESP32-S3 Opt | Opt Ratio | Data info | Memory |
| ----------------| --------|---------|---------|-------------|-----------|
| elementwise_add | 320397 | 87119 | 3.68 | size = 1615 | External |
| elementwise_mul | 125958 | 44239 | 2.85 | size = 1615 | External |
| convolution | 4663012 | 428675 | 10.88 | input(10,10), filter(64x1x1x64) | External |
| convolution | 301014 | 32433 | 9.28 | input(8,8), filter(16x1x1x16) | External |
| convolution | 2115418 | 1020923 | 2.07 | input(10,10), filter(64x3x3x3) | External |
| depthwise conv | 1190062 | 203278 | 5.85 | input (18, 18), pad(0,0), stride(1,1) filter: 1x3x3x16 | External |
| depthwise conv | 837072 | 182335 | 4.59 | input (12, 12), pad(1,1), stride(1,1) filter: 8x5x5x4 | External |
| max pool | 485714 | 76747 | 6.33 | input(16,16), filter (1x3x3x16) | Internal |
| avg pool | 541462 | 160580 | 3.37 | input(16,16), filter (1x3x3x16) | Internal |
| fully connected | 15853 | 9547 | 1.66 | len: 265, ch = 3 | Internal |
| prelu (relu6) | 19472 | 2734 | 7.12 | size, 1615 | Internal |
## Configuration
* To configure, please use `idf.py menuconfig` and under `ESP-NN` select `NN_OPTIMIZATIONS`
* There are two options presented:
* Optimized versions
* ANSI C
* Default selection is for `Optimized versions`. For ESP32-S3, assembly versions are automatically selected, whereas for other chipsets (viz., ESP32, ESP32-C3), generic optimisations are selected.
* For debugging purposes, you may want to select `ANSI C` reference versions.
## Contributing
If you encounter an issue with ESP-NN, or wish to submit a feature request, please use the Issues section on the Github.
For general questions related to this library, please use the esp32.com forum.
## Copyrights and License
All original source code in this repository is Copyright (C) 2020-2021 Espressif Systems. This source code is licensed under the Apache License 2.0 as described in the file LICENSE.

View File

@@ -1,46 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(CONFIG_NN_OPTIMIZED)
// select apt optimisations
#ifdef CONFIG_IDF_TARGET_ESP32S3
#define ARCH_ESP32_S3 1
#endif
#ifdef CONFIG_IDF_TARGET_ESP32
#define ARCH_ESP32 1
#endif
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* reference kernels included by default */
#include "esp_nn_ansi_headers.h"
#if defined(CONFIG_NN_OPTIMIZED)
#if defined(ARCH_ESP32_S3)
#include "esp_nn_esp32s3.h"
#else // for other platforms use generic optimisations
#include "esp_nn_generic_opt.h"
#endif // #if defined(ARCH_ESP32_S3)
#else
#include "esp_nn_ansi_c.h"
#endif
#ifdef __cplusplus
}
#endif

View File

@@ -1,47 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* @file Header definitions to include for ANSI C versions.
* These are just typedefs to pick up ANSI versions.
*/
#pragma once
#include "esp_nn_defs.h"
#include "esp_nn_ansi_headers.h"
#define esp_nn_add_elementwise_s8 esp_nn_add_elementwise_s8_ansi
#define esp_nn_mul_elementwise_s8 esp_nn_mul_elementwise_s8_ansi
#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_ansi
#define esp_nn_conv_s8 esp_nn_conv_s8_ansi
#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_ansi
#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_ansi
#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_ansi
#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_ansi
#define esp_nn_relu6_s8 esp_nn_relu6_s8_ansi
#define esp_nn_avg_pool_s8 esp_nn_avg_pool_s8_ansi
#define esp_nn_max_pool_s8 esp_nn_max_pool_s8_ansi
#define esp_nn_fully_connected_s8 esp_nn_fully_connected_s8_ansi
#define esp_nn_get_softmax_scratch_size esp_nn_get_softmax_scratch_size_ansi
#define esp_nn_set_softmax_scratch_buf esp_nn_set_softmax_scratch_buf_ansi
#define esp_nn_softmax_s8 esp_nn_softmax_s8_ansi

View File

@@ -1,309 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
/**
* @file Header definitions to include for esp_nn reference functions
*/
#include "esp_nn_defs.h"
/************************** Basic math functions ****************************/
/**
* @brief elementwise addition
*
* @note inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*
* shift values are expected to be <= 0
*/
void esp_nn_add_elementwise_s8_ansi(const int8_t *input1_data,
const int8_t *input2_data,
const int32_t input1_offset,
const int32_t input2_offset,
const int32_t input1_mult,
const int32_t input2_mult,
const int32_t input1_shift,
const int32_t input2_shift,
const int32_t left_shift,
int8_t *output,
const int32_t out_offset,
const int32_t out_mult,
const int32_t out_shift,
const int32_t activation_min,
const int32_t activation_max,
const int32_t size);
/**
* @brief elementwise multiplication
*
* @note inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*
* output shift is expected to be <= 0
*/
void esp_nn_mul_elementwise_s8_ansi(const int8_t *input1_data,
const int8_t *input2_data,
const int32_t input1_offset,
const int32_t input2_offset,
int8_t *output,
const int32_t out_offset,
const int32_t out_mult,
const int32_t out_shift,
const int32_t activation_min,
const int32_t activation_max,
const int32_t size);
/************************** Convolution functions *****************************/
/**
* @brief depthwise convolution per channel
*
* @note inputs type: int8_t, output: int8_t
* Version used in tflite is per channel.
* This version follows the same footsprints.
* Meaning, it has per out_channel shift and multiplier for
* requantization
*
* optimization notes: Though input_offset is int32 type,
* offset values are contained in 8 bits [-128, 127]
*/
void esp_nn_depthwise_conv_s8_ansi(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const dw_conv_params_t *conv_params,
const quant_data_t *quant_data);
/**
* @brief 2d-convolution channelwise
*
* @note operation: result += (input + offset) * filter
*
* inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*/
void esp_nn_conv_s8_ansi(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const conv_params_t *conv_params,
const quant_data_t *quant_data);
int esp_nn_get_conv_scratch_size_ansi(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const conv_params_t *conv_params);
void esp_nn_set_conv_scratch_buf_ansi(const void *buf);
int esp_nn_get_depthwise_conv_scratch_size_ansi(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const dw_conv_params_t *conv_params);
void esp_nn_set_depthwise_conv_scratch_buf_ansi(const void *buf);
/************************** Activation functions *****************************/
/**
* @brief relu6
*
* @note inout: int8_t
*/
void esp_nn_relu6_s8_ansi(int8_t *data, uint16_t size);
/************************** Pooling functions *****************************/
/**
* @brief max_pool
*
* @note inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*/
void esp_nn_max_pool_s8_ansi(const int8_t *input,
const uint16_t input_wd,
const uint16_t input_ht,
int8_t *output,
const uint16_t output_wd,
const uint16_t output_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint16_t filter_wd,
const uint16_t filter_ht,
const uint16_t pad_wd,
const uint16_t pad_ht,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t channels);
/**
* @brief avg_pool
*
* @note inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*/
void esp_nn_avg_pool_s8_ansi(const int8_t *input,
const uint16_t input_wd,
const uint16_t input_ht,
int8_t *output,
const uint16_t output_wd,
const uint16_t output_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint16_t filter_wd,
const uint16_t filter_ht,
const uint16_t pad_wd,
const uint16_t pad_ht,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t channels);
/************************** Fully connected functions ***********************/
/**
* @brief fully connected
*
* @note inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*/
void esp_nn_fully_connected_s8_ansi(const int8_t *input_data,
const int32_t input_offset,
const uint16_t row_len,
const int8_t *filter_data,
const int32_t filter_offset,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_channels,
const int32_t out_offset,
const int32_t out_shift,
const int32_t out_mult,
const int32_t activation_min,
const int32_t activation_max);
/**
* @brief Get scratch buffer size needed by softmax function
*
* @param width
* @param height
* @return size in bytes
*
* @note buffer must be 4 byte aligned
*/
int32_t esp_nn_get_softmax_scratch_size_ansi(const int32_t width, const int32_t height);
/* ANSI C function to be hooked up when optimised version needed */
int32_t esp_nn_get_softmax_scratch_size_opt(const int32_t width, const int32_t height);
/**
* @brief Set scratch buffer to be used by softmax function
*
* @param buffer this can be NULL if one needs to unset it
* must be aligned to 4 bytes
*/
void esp_nn_set_softmax_scratch_buf_ansi(void *buffer);
/**
* @brief reference softmax function
*
* @note inputs type: int8_t, output: int8_t
*/
void esp_nn_softmax_s8_ansi(const int8_t *input_data,
const int32_t height,
const int32_t width,
const int32_t mult,
const int32_t shift,
const int32_t diff_min,
int8_t *output_data);
//////////////////////////// Generic optimisations /////////////////////////////
/************************** Convolution functions *****************************/
/**
* @brief 2d-convolution channelwise optimized version
*
* @note operation: result += (input + offset) * filter
*
* inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*/
void esp_nn_conv_s8_opt(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const conv_params_t *conv_params,
const quant_data_t *quant_data);
/**
* @brief depthwise convolution per channel optimized version
*
* @note inputs type: int8_t, output: int8_t
* Version used in tflite is per channel.
* This version follows the same footsprints.
* Meaning, it has per out_channel shift and multiplier for
* requantization
*
* optimization notes: Though input_offset is int32 type,
* offset values are contained in 8 bits [-128, 127]
*/
void esp_nn_depthwise_conv_s8_opt(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const dw_conv_params_t *conv_params,
const quant_data_t *quant_data);
int esp_nn_get_conv_scratch_size_opt(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const conv_params_t *conv_params);
void esp_nn_set_conv_scratch_buf_opt(const void *buf);
int esp_nn_get_depthwise_conv_scratch_size_opt(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const dw_conv_params_t *conv_params);
void esp_nn_set_depthwise_conv_scratch_buf_opt(const void *buf);
/* ANSI C function to be hooked up when optimised version needed */
void esp_nn_set_softmax_scratch_buf_opt(void *buffer);
/**
* @brief optimised version of softmax function
*
* @note the function uses extra buffer (4 * width bytes)
* hence, scratch buffers must be set before calling this.
*/
void esp_nn_softmax_s8_opt(const int8_t *input_data,
const int32_t height,
const int32_t width,
const int32_t mult,
const int32_t shift,
const int32_t diff_min,
int8_t *output_data);

View File

@@ -1,83 +0,0 @@
// Copyright 2022 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
/**
* @brief structure to club data dims
* this structure can be used for input, output and filter
*/
typedef struct data_dims {
int32_t width;
int32_t height;
int32_t channels;
int32_t extra; // can be used as batch or any other param
} data_dims_t;
/**
* @brief 2d data structure (width, height)
*
*/
typedef struct data_2d {
int32_t width;
int32_t height;
} data_2d_t;
/**
* @brief min/max activation
*/
typedef struct act_params {
int32_t min;
int32_t max;
} act_params_t;
/**
* @brief per channel quant data
*
* @note number of shift and mult elements are equal to output channels
*/
typedef struct quant_data {
int32_t *shift;
int32_t *mult;
} quant_data_t;
/**
* @brief params specific to convolution 2d
*
*/
typedef struct conv_params {
int32_t in_offset;
int32_t out_offset;
data_2d_t stride;
data_2d_t padding;
data_2d_t dilation;
act_params_t activation;
} conv_params_t;
/**
* @brief params specific to depthwise convolution 2d
*
*/
typedef struct dw_conv_params {
int32_t in_offset;
int32_t out_offset;
int32_t ch_mult; // channel multiplier. (in_ch * ch_mult = out_ch)
data_2d_t stride;
data_2d_t padding;
data_2d_t dilation;
act_params_t activation;
} dw_conv_params_t;

View File

@@ -1,231 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* @file Header definitions to include for esp_nn optimized functions for
* the ESP32-S3 platform
*/
#pragma once
#include "esp_nn_defs.h"
#include "esp_nn_ansi_headers.h"
/************************** Basic math functions *****************************/
/**
* @brief elementwise addition
*
* @note inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*
* shift values are expected to be <= 0
*/
void esp_nn_add_elementwise_s8_esp32s3(const int8_t *input1_data,
const int8_t *input2_data,
const int32_t input1_offset,
const int32_t input2_offset,
const int32_t input1_mult,
const int32_t input2_mult,
const int32_t input1_shift,
const int32_t input2_shift,
const int32_t left_shift,
int8_t *output,
const int32_t out_offset,
const int32_t out_mult,
const int32_t out_shift,
const int32_t activation_min,
const int32_t activation_max,
const int32_t size);
/**
* @brief elementwise multiplication
*
* @note inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*
* output shift is expected to be <= 0
*/
void esp_nn_mul_elementwise_s8_esp32s3(const int8_t *input1_data,
const int8_t *input2_data,
const int32_t input1_offset,
const int32_t input2_offset,
int8_t *output,
const int32_t out_offset,
const int32_t out_mult,
const int32_t out_shift,
const int32_t activation_min,
const int32_t activation_max,
const int32_t size);
/************************** Convolution functions *****************************/
/**
* @brief depthwise convolution per channel
*
* @note inputs type: int8_t, output: int8_t
* Version used in tflite is per channel.
* This version follows the same footsprints.
* Meaning, it has per out_channel shift and multiplier for
* requantization
*
* optimization notes: Though input_offset is int32 type,
* offset values are contained in 8 bits [-128, 127]
*/
void esp_nn_depthwise_conv_s8_esp32s3(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *output_data,
const dw_conv_params_t *conv_params,
const quant_data_t *quant_data);
/**
* @brief 2d - convolution channelwise
*
* @note operation: result += (input + offset) * filter
*
* inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*/
void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *output_data,
const conv_params_t *conv_params,
const quant_data_t *quant_data);
int esp_nn_get_conv_scratch_size_esp32s3(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const conv_params_t *conv_params);
void esp_nn_set_conv_scratch_buf_esp32s3(const void *buf);
int esp_nn_get_depthwise_conv_scratch_size_esp32s3(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const dw_conv_params_t *conv_params);
void esp_nn_set_depthwise_conv_scratch_buf_esp32s3(const void *buf);
/************************** Pooling functions *****************************/
/**
* @brief max_pool
*
* @note inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*/
void esp_nn_max_pool_s8_esp32s3(const int8_t *input,
const uint16_t input_wd,
const uint16_t input_ht,
int8_t *output,
const uint16_t output_wd,
const uint16_t output_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint16_t filter_wd,
const uint16_t filter_ht,
const uint16_t pad_wd,
const uint16_t pad_ht,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t channels);
/**
* @brief avg_pool
*
* @note inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*/
void esp_nn_avg_pool_s8_esp32s3(const int8_t *input,
const uint16_t input_wd,
const uint16_t input_ht,
int8_t *output,
const uint16_t output_wd,
const uint16_t output_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint16_t filter_wd,
const uint16_t filter_ht,
const uint16_t pad_wd,
const uint16_t pad_ht,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t channels);
/************************** Fully connected functions *****************************/
/**
* @brief fully connected
*
* @note inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*
* Current version works only on aligned input.
* row_len and channels should both be multiple of 8.
*/
void esp_nn_fully_connected_s8_esp32s3(const int8_t *input_data,
const int32_t input_offset,
const uint16_t row_len,
const int8_t *filter_data,
const int32_t filter_offset,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_channels,
const int32_t out_offset,
const int32_t out_shift,
const int32_t out_mult,
const int32_t activation_min,
const int32_t activation_max);
/**
* @brief relu6
*
* @note inout: int8_t
*/
void esp_nn_relu6_s8_esp32s3(int8_t *data, uint16_t size);
/********************** function defines ***************************/
#define esp_nn_add_elementwise_s8 esp_nn_add_elementwise_s8_esp32s3
#define esp_nn_mul_elementwise_s8 esp_nn_mul_elementwise_s8_esp32s3
#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_esp32s3
#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_esp32s3
#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_esp32s3
#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_esp32s3
#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_esp32s3
#define esp_nn_conv_s8 esp_nn_conv_s8_esp32s3
#define esp_nn_relu6_s8 esp_nn_relu6_s8_esp32s3
#define esp_nn_avg_pool_s8 esp_nn_avg_pool_s8_esp32s3
#define esp_nn_max_pool_s8 esp_nn_max_pool_s8_esp32s3
#define esp_nn_fully_connected_s8 esp_nn_fully_connected_s8_esp32s3
#define esp_nn_get_softmax_scratch_size esp_nn_get_softmax_scratch_size_opt
#define esp_nn_set_softmax_scratch_buf esp_nn_set_softmax_scratch_buf_opt
#define esp_nn_softmax_s8 esp_nn_softmax_s8_opt

View File

@@ -1,47 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* @file Header definitions to include for esp_nn generic optimisations
* For functions which not having optimisations, _ansi versions are picked.
*/
#pragma once
#include "esp_nn_defs.h"
#include "esp_nn_ansi_headers.h"
#define esp_nn_add_elementwise_s8 esp_nn_add_elementwise_s8_ansi
#define esp_nn_mul_elementwise_s8 esp_nn_mul_elementwise_s8_ansi
#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_opt
#define esp_nn_conv_s8 esp_nn_conv_s8_opt
#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_opt
#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_opt
#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_opt
#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_opt
#define esp_nn_relu6_s8 esp_nn_relu6_s8_ansi
#define esp_nn_avg_pool_s8 esp_nn_avg_pool_s8_ansi
#define esp_nn_max_pool_s8 esp_nn_max_pool_s8_ansi
#define esp_nn_fully_connected_s8 esp_nn_fully_connected_s8_ansi
#define esp_nn_get_softmax_scratch_size esp_nn_get_softmax_scratch_size_opt
#define esp_nn_set_softmax_scratch_buf esp_nn_set_softmax_scratch_buf_opt
#define esp_nn_softmax_s8 esp_nn_softmax_s8_opt

View File

@@ -1,30 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <stdlib.h>
#include <common_functions.h>
void esp_nn_relu6_s8_ansi(int8_t *data, uint16_t size)
{
int32_t i;
for (i = 0; i < size; i++) {
int32_t ip = data[i];
ip = max(ip, 0);
data[i] = min(ip, 6);
}
}

View File

@@ -1,97 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <common_functions.h>
void esp_nn_add_elementwise_u8_ansi(const uint8_t *input1_data,
const uint8_t *input2_data,
const int32_t input1_offset,
const int32_t input2_offset,
const int32_t input1_mult,
const int32_t input2_mult,
const int32_t input1_shift,
const int32_t input2_shift,
const int32_t left_shift,
uint8_t *output,
const int32_t out_offset,
const int32_t out_mult,
const int32_t out_shift,
const int32_t activation_min,
const int32_t activation_max,
const int32_t size)
{
for (int i = 0; i < size; i++) {
int32_t tmp1 = input1_data[i] + input1_offset;
int32_t tmp2 = input2_data[i] + input2_offset;
tmp1 <<= left_shift;
tmp2 <<= left_shift;
tmp1 = esp_nn_sat_round_doubling_high_mul(tmp1, input1_mult);
tmp2 = esp_nn_sat_round_doubling_high_mul(tmp2, input2_mult);
tmp1 = esp_nn_div_by_power_of_two(tmp1, -input1_shift);
tmp2 = esp_nn_div_by_power_of_two(tmp2, -input2_shift);
int32_t out = tmp1 + tmp2;
out = esp_nn_sat_round_doubling_high_mul(out, out_mult);
out = esp_nn_div_by_power_of_two(out, -out_shift);
out = out + out_offset;
out = max(activation_min, min(out, activation_max));
output[i] = (uint8_t) out;
}
}
void esp_nn_add_elementwise_s8_ansi(const int8_t *input1_data,
const int8_t *input2_data,
const int32_t input1_offset,
const int32_t input2_offset,
const int32_t input1_mult,
const int32_t input2_mult,
const int32_t input1_shift,
const int32_t input2_shift,
const int32_t left_shift,
int8_t *output,
const int32_t out_offset,
const int32_t out_mult,
const int32_t out_shift,
const int32_t activation_min,
const int32_t activation_max,
const int32_t size)
{
for (int i = 0; i < size; i++) {
int32_t tmp1 = input1_data[i] + input1_offset;
int32_t tmp2 = input2_data[i] + input2_offset;
tmp1 <<= left_shift;
tmp2 <<= left_shift;
tmp1 = esp_nn_sat_round_doubling_high_mul(tmp1, input1_mult);
tmp2 = esp_nn_sat_round_doubling_high_mul(tmp2, input2_mult);
tmp1 = esp_nn_div_by_power_of_two(tmp1, -input1_shift);
tmp2 = esp_nn_div_by_power_of_two(tmp2, -input2_shift);
int32_t out = tmp1 + tmp2;
out = esp_nn_sat_round_doubling_high_mul(out, out_mult);
out = esp_nn_div_by_power_of_two(out, -out_shift);
out = out + out_offset;
out = max(activation_min, min(out, activation_max));
output[i] = (int8_t) out;
}
}

View File

@@ -1,42 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <common_functions.h>
void esp_nn_mul_elementwise_s8_ansi(const int8_t *input1_data,
const int8_t *input2_data,
const int32_t input1_offset,
const int32_t input2_offset,
int8_t *output,
const int32_t out_offset,
const int32_t out_mult,
const int32_t out_shift,
const int32_t activation_min,
const int32_t activation_max,
const int32_t size)
{
for (int i = 0; i < size; i++) {
int32_t tmp1 = input1_data[i] + input1_offset;
int32_t tmp2 = input2_data[i] + input2_offset;
int32_t out = tmp1 * tmp2;
out = esp_nn_multiply_by_quantized_mult(out, out_mult, out_shift);
out = out + out_offset;
out = max(activation_min, min(out, activation_max));
output[i] = (int8_t) out;
}
}

View File

@@ -1,255 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
/**
* c99 standard still doesn't strictly inline functions
* We need to use attribute as well to do this.
*/
#define __NN_FORCE_INLINE__ __attribute((always_inline)) static inline
/* min/max macros */
#ifndef max
#define max(a, b) ({ \
__typeof__ (a) _a = (a); \
__typeof__ (b) _b = (b); \
_a > _b ? _a : _b; \
})
#define min(a, b) ({ \
__typeof__ (a) _a = (a); \
__typeof__ (b) _b = (b); \
_a < _b ? _a : _b; \
})
#endif
__NN_FORCE_INLINE__ int32_t esp_nn_clz32(uint32_t in)
{
#if CONFIG_IDF_TARGET_ARCH_XTENSA
__asm__ volatile("nsau %0, %0" : "+r" (in));
return in;
#elif defined(__GNUC__)
return __builtin_clz(in);
#else
int32_t count = 32;
uint32_t x = in, y = in >> 16;
if (y != 0) {
count -= 16;
x = y;
}
y = x >> 8;
if (y != 0) {
count -= 8;
x = y;
}
y = x >> 4;
if (y != 0) {
count -= 4;
x = y;
}
y = x >> 2;
if (y != 0) {
count -= 2;
x = y;
}
y = x >> 1;
if (y != 0) {
return count - 2;
}
return count - x;
#endif
}
/**
* Signed saturate a 32 bit value to 8 bits keeping output in 32 bit variable.
*/
__NN_FORCE_INLINE__ int32_t esp_nn_saturate8(int32_t in)
{
#if CONFIG_IDF_TARGET_ARCH_XTENSA
__asm__ volatile("clamps %0, %0, 7" : "+a"(in));
return in;
#else
return max(INT8_MIN, min(in, INT8_MAX));
#endif
}
__NN_FORCE_INLINE__ int32_t esp_nn_pick_sat_high32_of64(int64_t val64)
{
int32_t sign = (int32_t) (val64 >> 63);
int32_t to_add = sign & ((1ul << 31) - 1);
return (int32_t) ((int64_t) (val64 + to_add) >> 31);
}
__NN_FORCE_INLINE__ int32_t esp_nn_sat_round_doubling_high_mul(int32_t in0, int32_t in1)
{
int32_t result;
int64_t in0_64 = (int64_t) in0;
bool overflow = (in0 == in1) && (in0 == (int32_t) INT32_MIN);
/* Nudge value */
int64_t nudge_val = 1 << 30;
if ((in0 < 0) ^ (in1 < 0)) {
nudge_val = 1 - nudge_val;
}
/* Multiply and add nudge */
int64_t mult = in0_64 * in1 + nudge_val;
/* Round and pickup 32 bits */
result = esp_nn_pick_sat_high32_of64(mult);
return overflow ? INT32_MAX : result;
}
/**
* fast version
* this will fail for values closer to INT32_MAX and INT32_MIN by `1 << (exponent - 1)`.
* We can afford to do this because we are at the very last stage of filter.
* Also it is pretty rare condition as our output is going to be 8 bit.
*/
__NN_FORCE_INLINE__ int32_t esp_nn_div_by_power_of_two_fast(int32_t val, int32_t exponent)
{
int32_t to_add = (1 << (exponent - 1)) - (val < 0);
return (int32_t) ((val + to_add) >> exponent);
}
__NN_FORCE_INLINE__ int32_t esp_nn_div_by_power_of_two(int32_t val, int32_t exponent)
{
int32_t result;
const int32_t mask = (1 << exponent) - 1;
const int32_t remainder = val & mask;
result = val >> exponent;
int32_t threshold = (mask >> 1) + (result < 0);
if (remainder > threshold) {
result += 1;
}
return result;
}
__NN_FORCE_INLINE__ int32_t esp_nn_multiply_by_quantized_mult(int32_t x, int32_t mult, int32_t shift)
{
int32_t left_shift = shift > 0 ? shift : 0;
int32_t right_shift = shift > 0 ? 0 : -shift;
int32_t result = esp_nn_sat_round_doubling_high_mul(x * (1 << left_shift), mult);
return esp_nn_div_by_power_of_two(result, right_shift);
}
__NN_FORCE_INLINE__ int32_t esp_nn_multiply_by_quantized_mult_fast(int32_t x, int32_t mult, int32_t shift)
{
int32_t left_shift = max(shift, 0);
int32_t right_shift = left_shift - shift;
int64_t nudge_val = 1 << 30;
int64_t in0_64 = (int64_t) (x << left_shift);
/* Multiply and add nudge */
int64_t mult_64 = in0_64 * mult + nudge_val;
int32_t result = (int32_t) (mult_64 >> 31);
if (right_shift) {
result = esp_nn_div_by_power_of_two_fast(result, right_shift);
}
return result;
}
static void esp_nn_aligned_s8_pad_with_value(const int8_t *src, int8_t *dst,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const int32_t pad_val,
const uint16_t pad_wd,
const uint16_t pad_ht)
{
/* memset with pad_val */
memset(dst, pad_val, ((input_wd + 2 * pad_wd) * (input_ht + 2 * pad_ht)) * channels);
dst += (pad_wd + input_wd + pad_wd) * channels;
for (int i = 0; i < input_ht; i++) {
dst += pad_wd * channels;
for (int j = 0; j < input_wd * channels; j++) {
*dst++ = *src++;
}
dst += pad_wd * channels;
}
}
static void esp_nn_aligned_s8_pad_end_with_value(const int8_t *src, int8_t *dst,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const int32_t pad_val,
const uint16_t pad_wd,
const uint16_t pad_ht)
{
for (int i = 0; i < input_ht; i++) {
for (int j = 0; j < input_wd * channels; j++) {
*dst++ = *src++;
}
if (pad_wd) {
memset(dst, pad_val, pad_wd * channels);
dst += pad_wd * channels;
}
}
/* pad end `pad_ht` lines at end */
if (pad_ht) {
memset(dst, pad_val, (input_wd + pad_wd) * pad_ht * channels);
}
}
/**
* @brief convert 8 bit input data to 16 bit
*
* @param src int8_t source data
* @param dst int16_t dst data
* @param size length of data
* @param offset offset to be added to src data. Range: [-128, 127]
*/
__NN_FORCE_INLINE__ void esp_nn_s8_to_s16_with_offset(const int8_t *src, int16_t *dst,
const int size, const int32_t offset)
{
int i = 0;
for (; i < size; i += 2) {
dst[i + 0] = src[i + 0] + offset;
dst[i + 1] = src[i + 1] + offset;
}
if(i < size) {
dst[i] = src[i] + offset;
}
}
/**
* @brief convert 8 bit input data to 16 bit
*
* @param src int8_t source data
* @param dst int16_t dst data
* @param size length of data
*/
__NN_FORCE_INLINE__ void esp_nn_s8_to_s16(const int8_t *src, int16_t *dst, const int size)
{
int i = 0;
for (; i < size; i += 2) {
dst[i + 0] = src[i + 0];
dst[i + 1] = src[i + 1];
}
if(i < size) {
dst[i] = src[i];
}
}

View File

@@ -1,179 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <esp_nn_defs.h>
#include <common_functions.h>
int esp_nn_get_conv_scratch_size_ansi(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const conv_params_t *conv_params)
{
return 0;
}
void esp_nn_set_conv_scratch_buf_ansi(const void *buf)
{
}
/**
* Assumption 1: i/p channels == o/p channels
* Assumption 2: Pointers are valid
* Assumption 3: dialation width = 1
*/
void esp_nn_conv_u8_ansi(const uint8_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t in_channels,
const int32_t input_offset,
const uint16_t pad_wd,
const uint16_t pad_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint8_t *filter_data,
const uint16_t filter_wd,
const uint16_t filter_ht,
const int32_t filter_offset,
const int32_t *bias,
uint8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const uint16_t out_channels,
const int32_t out_offset,
const int32_t out_shift,
const int32_t out_mult,
const int32_t activation_min,
const int32_t activation_max)
{
for (int out_y = 0; out_y < out_ht; out_y++) { //height loop
const int16_t base_y = (out_y * stride_ht) - pad_ht;
for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
const int16_t base_x = (out_x * stride_wd) - pad_wd;
for (int out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {//channel_loop
int32_t result = 0;
/* Select filter so as the point doesn't lie outside block */
int filter_y_start = max(0, -base_y);
int filter_x_start = max(0, -base_x);
int filter_y_end = min(filter_ht, input_ht - base_y);
int filter_x_end = min(filter_wd, input_wd - base_x);
for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
const int32_t idx_y = base_y + filter_y_idx;
for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t idx_x = base_x + filter_x_idx;
for (int in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) {
int32_t input_index = (idx_y * input_wd + idx_x) * in_channels + in_ch_idx;
int32_t filter_index = ((out_ch_idx * filter_ht + filter_y_idx)
* filter_wd + filter_x_idx) * in_channels
+ in_ch_idx;
int32_t input_val = input_data[input_index] + input_offset;
int32_t filter_val = filter_data[filter_index] + filter_offset;
result += input_val * filter_val;
}
}
}
if (bias) {
result += bias[out_ch_idx];
}
result = esp_nn_multiply_by_quantized_mult(result, out_mult, out_shift);
result += out_offset;
result = max(result, activation_min);
result = min(result, activation_max);
int out_index = (out_y * out_wd + out_x) * out_channels + out_ch_idx;
out_data[out_index] = (uint8_t) result;
}
}
}
}
/**
* Assumption 1: i/p channels == o/p channels
* Assumption 2: Pointers are valid
* Assumption 3: dialation width = 1
*/
void esp_nn_conv_s8_ansi(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const conv_params_t *conv_params,
const quant_data_t *quant_data)
{
const uint16_t input_wd = input_dims->width;
const uint16_t input_ht = input_dims->height;
const uint16_t in_channels = input_dims->channels;
const int32_t input_offset = conv_params->in_offset;
const int32_t out_offset = conv_params->out_offset;
const uint16_t pad_wd = conv_params->padding.width;
const uint16_t pad_ht = conv_params->padding.height;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
const uint16_t filter_wd = filter_dims->width;
const uint16_t filter_ht = filter_dims->height;
const uint16_t out_wd = output_dims->width;
const uint16_t out_ht = output_dims->height;
const uint16_t out_channels = output_dims->channels;
const int32_t *out_shift = quant_data->shift;
const int32_t *out_mult = quant_data->mult;
const int32_t activation_min = conv_params->activation.min;
const int32_t activation_max = conv_params->activation.max;
int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx;
for (out_y = 0; out_y < out_ht; out_y++) {
for (out_x = 0; out_x < out_wd; out_x++) {
for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
int32_t conv_out = 0;
const int32_t base_y = stride_ht * out_y - pad_ht;
const int32_t base_x = stride_wd * out_x - pad_wd;
const int32_t filter_y_start = max(0, -base_y);
const int32_t filter_x_start = max(0, -base_x);
const int32_t filter_y_end = min(filter_ht, input_ht - base_y);
const int32_t filter_x_end = min(filter_wd, input_wd - base_x);
for (filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t in_row = base_y + filter_y_idx;
const int32_t in_col = base_x + filter_x_idx;
int32_t input_base_offset = (in_row * input_wd + in_col) * in_channels;
int32_t filter_base_offset = out_ch_idx * in_channels * filter_ht * filter_wd +
(filter_y_idx * filter_wd + filter_x_idx) * in_channels;
for (in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) {
conv_out +=
(input_data[input_base_offset + in_ch_idx] + input_offset) *
filter_data[filter_base_offset + in_ch_idx];
}
}
}
if (bias) {
conv_out += bias[out_ch_idx];
}
conv_out = esp_nn_multiply_by_quantized_mult(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]);
conv_out += out_offset;
conv_out = max(conv_out, activation_min);
conv_out = min(conv_out, activation_max);
*out_data++ = (int8_t) conv_out;
}
}
}
}

View File

@@ -1,463 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <esp_nn_defs.h>
#include <common_functions.h>
static int16_t *scratch_buffer = NULL;
extern void esp_nn_conv_s8_mult8_1x1_esp32s3(const int8_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t in_channels,
const int32_t input_offset,
const int8_t *filter_aligned,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const uint16_t out_channels,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max,
void *buffer /* scratch buffer */);
extern void esp_nn_conv_s16_mult4_1x1_esp32s3(const int16_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t in_channels,
const int16_t *filter_data,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const uint16_t out_channels,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max,
void *buffer /* scratch buffer */);
extern void esp_nn_conv_s16_mult8_esp32s3(const int16_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t in_channels,
const uint16_t pad_wd,
const uint16_t pad_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const int16_t *filter_data,
const uint16_t filter_wd,
const uint16_t filter_ht,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const uint16_t out_channels,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max);
extern void esp_nn_aligned_s8_to_s16_with_offset_esp32s3(const int8_t *src, int16_t *dst,
const int size, const int32_t offset);
extern void esp_nn_s8_to_s16_esp32s3(const int8_t *src, int16_t *dst, const int size);
static void esp_nn_conv_s8_unrolled(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const conv_params_t *conv_params,
const quant_data_t *quant_data)
{
const uint16_t input_wd = input_dims->width;
const uint16_t input_ht = input_dims->height;
const uint16_t in_ch = input_dims->channels;
const int32_t input_offset = conv_params->in_offset;
const int32_t out_offset = conv_params->out_offset;
const uint16_t pad_wd = conv_params->padding.width;
const uint16_t pad_ht = conv_params->padding.height;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
const uint16_t filter_wd = filter_dims->width;
const uint16_t filter_ht = filter_dims->height;
const uint16_t out_wd = output_dims->width;
const uint16_t out_ht = output_dims->height;
const uint16_t out_ch = output_dims->channels;
const int32_t *out_shift = quant_data->shift;
const int32_t *out_mult = quant_data->mult;
const int32_t activation_min = conv_params->activation.min;
const int32_t activation_max = conv_params->activation.max;
int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx;
for (out_y = 0; out_y < out_ht; out_y++) {
for (out_x = 0; out_x < out_wd; out_x++) {
for (out_ch_idx = 0; out_ch_idx < out_ch; out_ch_idx++) {
int32_t conv_out = 0;
const int32_t base_y = stride_ht * out_y - pad_ht;
const int32_t base_x = stride_wd * out_x - pad_wd;
const int32_t filter_y_start = max(0, -base_y);
const int32_t filter_x_start = max(0, -base_x);
const int32_t filter_y_end = min(filter_ht, input_ht - base_y);
const int32_t filter_x_end = min(filter_wd, input_wd - base_x);
for (filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t in_row = base_y + filter_y_idx;
const int32_t in_col = base_x + filter_x_idx;
int32_t input_base_offset = (in_row * input_wd + in_col) * in_ch;
int32_t filter_base_offset = out_ch_idx * in_ch * filter_ht * filter_wd +
(filter_y_idx * filter_wd + filter_x_idx) * in_ch;
for (in_ch_idx = 0; in_ch_idx < in_ch; in_ch_idx++) {
conv_out +=
(input_data[input_base_offset + in_ch_idx] + input_offset) *
filter_data[filter_base_offset + in_ch_idx];
}
}
}
if (bias) {
conv_out += bias[out_ch_idx];
}
conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]);
conv_out += out_offset;
conv_out = max(conv_out, activation_min);
conv_out = min(conv_out, activation_max);
*out_data++ = (int8_t) conv_out;
}
}
}
}
static void esp_nn_conv_s8_pad_valid(const int8_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t in_channels,
const int32_t input_offset,
const uint16_t stride_wd,
const uint16_t stride_ht,
const int8_t *filter_data,
const uint16_t filter_wd,
const uint16_t filter_ht,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const uint16_t out_channels,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max)
{
int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx;
for (out_y = 0; out_y < out_ht; out_y++) {
for (out_x = 0; out_x < out_wd; out_x++) {
for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
int32_t conv_out = 0;
const int32_t base_y = stride_ht * out_y;
const int32_t base_x = stride_wd * out_x;
for (filter_y_idx = 0; filter_y_idx < filter_ht; filter_y_idx++) {
for (filter_x_idx = 0; filter_x_idx < filter_wd; filter_x_idx++) {
const int32_t in_row = base_y + filter_y_idx;
const int32_t in_col = base_x + filter_x_idx;
int32_t input_base_offset = (in_row * input_wd + in_col) * in_channels;
int32_t filter_base_offset = out_ch_idx * in_channels * filter_ht * filter_wd +
(filter_y_idx * filter_wd + filter_x_idx) * in_channels;
const int8_t *input_data_ptr = input_data + input_base_offset;
const int8_t *filter_data_ptr = filter_data + filter_base_offset;
for (in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) {
conv_out += (*input_data_ptr++ + input_offset) * *filter_data_ptr++;
}
}
}
if (bias) {
conv_out += bias[out_ch_idx];
}
conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]);
conv_out += out_offset;
conv_out = max(conv_out, activation_min);
conv_out = min(conv_out, activation_max);
*out_data++ = (int8_t) conv_out;
}
}
}
}
static void esp_nn_conv_s8_pad_valid_3x3(const int8_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t in_channels,
const int32_t input_offset,
const uint16_t stride_wd,
const uint16_t stride_ht,
const int8_t *filter_data,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const uint16_t out_channels,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max)
{
int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx;
for (out_y = 0; out_y < out_ht; out_y++) {
for (out_x = 0; out_x < out_wd; out_x++) {
const int32_t base_y = stride_ht * out_y;
const int32_t base_x = stride_wd * out_x;
for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
int32_t conv_out = 0;
for (filter_y_idx = 0; filter_y_idx < 3; filter_y_idx++) {
for (filter_x_idx = 0; filter_x_idx < 3; filter_x_idx++) {
const int32_t in_row = base_y + filter_y_idx;
const int32_t in_col = base_x + filter_x_idx;
int32_t input_base_offset = (in_row * input_wd + in_col) * in_channels;
int32_t filter_base_offset = out_ch_idx * in_channels * 3 * 3 +
(filter_y_idx * 3 + filter_x_idx) * in_channels;
const int8_t *input_data_ptr = input_data + input_base_offset;
const int8_t *filter_data_ptr = filter_data + filter_base_offset;
for (in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) {
conv_out += (*input_data_ptr++ + input_offset) * *filter_data_ptr++;
}
}
}
if (bias) {
conv_out += bias[out_ch_idx];
}
conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]);
conv_out += out_offset;
conv_out = max(conv_out, activation_min);
conv_out = min(conv_out, activation_max);
*out_data++ = (int8_t) conv_out;
}
}
}
}
static void esp_nn_conv_s8_pad_valid_ch3_3x3(const int8_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const int32_t input_offset,
const uint16_t stride_wd,
const uint16_t stride_ht,
const int8_t *filter_data,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const uint16_t out_channels,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max)
{
int32_t out_ch_idx, out_y, out_x, filter_y_idx;
/* use scratch_buffer to pre-compute offset factor */
int16_t *filter_sum = (int16_t *) scratch_buffer;
const int8_t *filter_ptr = filter_data;
for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
int16_t sum_val = 0;
for (int i = 0; i < 9; i++) {
sum_val += *filter_ptr++;
sum_val += *filter_ptr++;
sum_val += *filter_ptr++;
}
*filter_sum++ = sum_val;
}
for (out_y = 0; out_y < out_ht; out_y++) {
for (out_x = 0; out_x < out_wd; out_x++) {
const int8_t *filter_data_ptr = filter_data;
const int32_t base_y = stride_ht * out_y;
const int32_t base_x = stride_wd * out_x;
const int8_t *input_base_ptr = input_data + (base_y * input_wd + base_x) * 3;
int16_t *filter_sum = (int16_t *) scratch_buffer;
for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
int32_t conv_out = 0;
for (filter_y_idx = 0; filter_y_idx < 3; filter_y_idx++) {
const int8_t *input_data_ptr = input_base_ptr + (filter_y_idx * input_wd) * 3;
conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
}
conv_out += *filter_sum++ * input_offset;
if (bias) {
conv_out += bias[out_ch_idx];
}
conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]);
conv_out += out_offset;
conv_out = max(conv_out, activation_min);
conv_out = min(conv_out, activation_max);
*out_data++ = (int8_t) conv_out;
}
}
}
}
int esp_nn_get_conv_scratch_size_esp32s3(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const conv_params_t *conv_params)
{
const uint16_t input_wd = input_dims->width;
const uint16_t input_ht = input_dims->height;
const uint16_t in_ch = input_dims->channels;
const uint16_t filter_wd = filter_dims->width;
const uint16_t filter_ht = filter_dims->height;
const uint16_t out_ch = output_dims->channels;
const uint16_t pad_wd = conv_params->padding.width;
const uint16_t pad_ht = conv_params->padding.height;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
int filter_size = filter_wd * filter_ht * in_ch * out_ch;
int input_size = input_wd * input_ht * in_ch;
int transpose_buf_size = 2 * (8 * in_ch); /* to store intermediate data */
if (input_wd * input_ht < 8) {
transpose_buf_size = 0; // not using this for leftover
}
int align_buf_size = 32; /* extra buffer for alignment */
if (in_ch % 8 == 0 && filter_wd == 1 && filter_ht == 1 &&
pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) {
return filter_size + transpose_buf_size + align_buf_size;
}
return 2 * (filter_size + input_size) + transpose_buf_size + align_buf_size;
}
void esp_nn_set_conv_scratch_buf_esp32s3(void *buf)
{
scratch_buffer = (int16_t *) buf;
}
void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims,
const int8_t *input,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const conv_params_t *conv_params,
const quant_data_t *quant_data)
{
const uint16_t input_wd = input_dims->width;
const uint16_t input_ht = input_dims->height;
const uint16_t channels = input_dims->channels;
const int32_t input_offset = conv_params->in_offset;
const int32_t out_offset = conv_params->out_offset;
const uint16_t pad_wd = conv_params->padding.width;
const uint16_t pad_ht = conv_params->padding.height;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
const uint16_t filter_wd = filter_dims->width;
const uint16_t filter_ht = filter_dims->height;
const uint16_t out_wd = output_dims->width;
const uint16_t out_ht = output_dims->height;
const uint16_t out_channels = output_dims->channels;
const int32_t *out_shift = quant_data->shift;
const int32_t *out_mult = quant_data->mult;
const int32_t activation_min = conv_params->activation.min;
const int32_t activation_max = conv_params->activation.max;
int filter_size = filter_wd * filter_ht * channels * out_channels;
int input_size = input_wd * input_ht * channels;
int align_len = 16 - (filter_size & 15);
int16_t *filter_data16 = scratch_buffer;
int16_t *input_data16 = scratch_buffer + filter_size + align_len;
if (scratch_buffer == NULL) {
printf("esp_nn_conv error! scratch_buffer not set!\n");
return;
}
if (channels % 8 == 0 && filter_wd == 1 && filter_ht == 1 &&
pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) {
int8_t *filter_aligned = (int8_t *) scratch_buffer;
int scratch_offset = (int) (filter_aligned + filter_size);
void *scratch_buf = (void *) (scratch_offset + 16 - (scratch_offset & 15));
memcpy(filter_aligned, filter_data, filter_size); // copy to aligned address
esp_nn_conv_s8_mult8_1x1_esp32s3(
input, input_wd, input_ht, channels, input_offset, filter_aligned,
bias, out_data, out_wd, out_ht, out_channels, out_offset,
out_shift, out_mult, activation_min, activation_max, scratch_buf);
} else if (channels % 4 == 0 && filter_wd == 1 && filter_ht == 1 &&
(input_wd * input_ht) % 4 == 0 && /* TODO: remove this check */
pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) {
int scratch_offset = (int) (input_data16 + input_size);
void *scratch_buf = (void *) (scratch_offset + 16 - (scratch_offset & 15));
esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size);
esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input, input_data16, input_size, input_offset);
esp_nn_conv_s16_mult4_1x1_esp32s3(
input_data16, input_wd, input_ht, channels, filter_data16,
bias, out_data, out_wd, out_ht, out_channels, out_offset,
out_shift, out_mult, activation_min, activation_max, scratch_buf);
} else if (channels % 8 == 0) {
esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size);
esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input, input_data16, input_size, input_offset);
esp_nn_conv_s16_mult8_esp32s3(
input_data16, input_wd, input_ht, channels, pad_wd, pad_ht,
stride_wd, stride_ht, filter_data16, filter_wd, filter_ht, bias,
out_data, out_wd, out_ht, out_channels, out_offset, out_shift,
out_mult, activation_min, activation_max);
} else if (pad_wd == 0 && pad_ht == 0) {
if (filter_wd == 3 && filter_ht == 3 && channels == 3) {
esp_nn_conv_s8_pad_valid_ch3_3x3(input, input_wd, input_ht, input_offset,
stride_wd, stride_ht, filter_data, bias,
out_data, out_wd, out_ht, out_channels, out_offset,
out_shift, out_mult, activation_min, activation_max);
} else {
esp_nn_conv_s8_pad_valid(input, input_wd, input_ht, channels, input_offset,
stride_wd, stride_ht, filter_data, filter_wd, filter_ht, bias,
out_data, out_wd, out_ht, out_channels, out_offset, out_shift,
out_mult, activation_min, activation_max);
}
} else {
/* Basic unrolled version */
esp_nn_conv_s8_unrolled(input_dims, input, filter_dims, filter_data,
bias, output_dims, out_data, conv_params, quant_data);
}
}

View File

@@ -1,179 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <esp_nn_defs.h>
#include <common_functions.h>
int esp_nn_get_conv_scratch_size_opt(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const conv_params_t *conv_params)
{
return 0;
}
void esp_nn_set_conv_scratch_buf_opt(const void *buf)
{
}
__attribute__ ((noinline))
static void esp_nn_conv_s8_1x1(const data_dims_t *input_dims,
const int8_t *input_data,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const conv_params_t *conv_params,
const quant_data_t *quant_data)
{
const uint16_t input_wd = input_dims->width;
const uint16_t in_channels = input_dims->channels;
const int32_t input_offset = conv_params->in_offset;
const int32_t out_offset = conv_params->out_offset;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
const uint16_t out_wd = output_dims->width;
const uint16_t out_ht = output_dims->height;
const uint16_t out_channels = output_dims->channels;
const int32_t activation_min = conv_params->activation.min;
const int32_t activation_max = conv_params->activation.max;
for (int32_t in_row = 0; in_row < out_ht * stride_ht; in_row += stride_ht) {
for (int32_t in_col = 0; in_col < out_wd * stride_wd; in_col += stride_wd) {
const int32_t *out_mult = quant_data->mult;
const int32_t *out_shift = quant_data->shift;
const int8_t *filter_ptr = filter_data;
const int8_t *input_base_ptr = input_data + (in_row * input_wd + in_col) * in_channels;
int32_t out_ch_idx = 0;
for (; out_ch_idx < out_channels; out_ch_idx++) {
int32_t conv_out = 0;
const int8_t *input_ptr = input_base_ptr;
int32_t in_ch_idx = 0;
for (; in_ch_idx < in_channels - 3; in_ch_idx += 4) {
conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
}
for (; in_ch_idx < in_channels; in_ch_idx ++) {
conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
}
if (bias) {
conv_out += bias[out_ch_idx];
}
conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, *out_mult++, *out_shift++);
conv_out += out_offset;
conv_out = max(conv_out, activation_min);
conv_out = min(conv_out, activation_max);
*out_data++ = (int8_t) conv_out;
}
}
}
}
/**
* Assumption 1: i/p channels == o/p channels
* Assumption 2: Pointers are valid
* Assumption 3: dialation width = 1
*/
void esp_nn_conv_s8_opt(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const conv_params_t *conv_params,
const quant_data_t *quant_data)
{
const uint16_t filter_wd = filter_dims->width;
const uint16_t filter_ht = filter_dims->height;
if (filter_wd == 1 && filter_ht == 1) {
esp_nn_conv_s8_1x1(input_dims, input_data, filter_data, bias,
output_dims, out_data, conv_params, quant_data);
return;
}
const uint16_t input_wd = input_dims->width;
const uint16_t input_ht = input_dims->height;
const uint16_t in_channels = input_dims->channels;
const int32_t input_offset = conv_params->in_offset;
const int32_t out_offset = conv_params->out_offset;
const uint16_t pad_wd = conv_params->padding.width;
const uint16_t pad_ht = conv_params->padding.height;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
const uint16_t out_wd = output_dims->width;
const uint16_t out_ht = output_dims->height;
const uint16_t out_channels = output_dims->channels;
const int32_t activation_min = conv_params->activation.min;
const int32_t activation_max = conv_params->activation.max;
int32_t out_ch_idx, out_y, out_x, filter_y_idx, filter_x_idx;
for (out_y = 0; out_y < out_ht; out_y++) {
for (out_x = 0; out_x < out_wd; out_x++) {
const int32_t *out_shift = quant_data->shift;
const int32_t *out_mult = quant_data->mult;
for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
int32_t conv_out = 0;
const int32_t base_y = stride_ht * out_y - pad_ht;
const int32_t base_x = stride_wd * out_x - pad_wd;
const int32_t filter_y_start = max(0, -base_y);
const int32_t filter_x_start = max(0, -base_x);
const int32_t filter_y_end = min(filter_ht, input_ht - base_y);
const int32_t filter_x_end = min(filter_wd, input_wd - base_x);
for (filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t in_row = base_y + filter_y_idx;
const int32_t in_col = base_x + filter_x_idx;
const int8_t *input_ptr = input_data +
(in_row * input_wd + in_col) * in_channels;
const int8_t *filter_ptr = filter_data +
out_ch_idx * in_channels * filter_ht * filter_wd +
(filter_y_idx * filter_wd + filter_x_idx) * in_channels;
int32_t in_ch_idx = 0;
for (; in_ch_idx < in_channels - 3; in_ch_idx += 4) {
conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
}
for (; in_ch_idx < in_channels; in_ch_idx ++) {
conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
}
}
}
if (bias) {
conv_out += bias[out_ch_idx];
}
conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, *out_mult++, *out_shift++);
conv_out += out_offset;
conv_out = max(conv_out, activation_min);
conv_out = min(conv_out, activation_max);
*out_data++ = (int8_t) conv_out;
}
}
}
}

View File

@@ -1,100 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <esp_nn_defs.h>
#include <common_functions.h>
int esp_nn_get_depthwise_conv_scratch_size_ansi(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const dw_conv_params_t *conv_params)
{
return 0;
}
void esp_nn_set_depthwise_conv_scratch_buf_ansi(const void *buf)
{
}
void esp_nn_depthwise_conv_s8_ansi(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const dw_conv_params_t *conv_params,
const quant_data_t *quant_data)
{
const uint16_t input_wd = input_dims->width;
const uint16_t input_ht = input_dims->height;
const uint16_t channels = input_dims->channels;
const int32_t input_offset = conv_params->in_offset;
const int32_t out_offset = conv_params->out_offset;
const uint16_t pad_wd = conv_params->padding.width;
const uint16_t pad_ht = conv_params->padding.height;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
const uint16_t filter_wd = filter_dims->width;
const uint16_t filter_ht = filter_dims->height;
const uint16_t out_wd = output_dims->width;
const uint16_t out_ht = output_dims->height;
const int32_t *out_shift = quant_data->shift;
const int32_t *out_mult = quant_data->mult;
const int32_t activation_min = conv_params->activation.min;
const int32_t activation_max = conv_params->activation.max;
const uint16_t ch_mult = conv_params->ch_mult;
int out_idx = 0;
for (int out_y = 0; out_y < out_ht; out_y++) { //height loop
const int16_t base_y = (out_y * stride_ht) - pad_ht;
for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
const int16_t base_x = (out_x * stride_wd) - pad_wd;
for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop
for (int ch_mult_idx = 0; ch_mult_idx < ch_mult; ch_mult_idx++) {
int32_t result = 0;
const int out_ch_idx = ch_mult_idx + ch_idx * ch_mult;
/* Select filter so as the point doesn't lie outside block */
int filter_y_start = max(0, -base_y);
int filter_x_start = max(0, -base_x);
int filter_y_end = min(filter_ht, input_ht - base_y);
int filter_x_end = min(filter_wd, input_wd - base_x);
for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
const int32_t idx_y = base_y + filter_y_idx;
for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t idx_x = base_x + filter_x_idx;
int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx;
int32_t input_val = input_data[input_index] + input_offset;
int32_t filter_val = filter_data[filter_index];
result += input_val * filter_val;
}
}
if (bias) {
result += bias[out_ch_idx];
}
result = esp_nn_multiply_by_quantized_mult(result, out_mult[out_ch_idx], out_shift[out_ch_idx]);
result += out_offset;
result = max(result, activation_min);
result = min(result, activation_max);
out_data[out_idx++] = result;
}
}
}
}
}

View File

@@ -1,291 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <esp_nn_defs.h>
#include <common_functions.h>
int esp_nn_get_depthwise_conv_scratch_size_opt(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const dw_conv_params_t *conv_params)
{
return 0;
}
void esp_nn_set_depthwise_conv_scratch_buf_opt(const void *buf)
{
}
/* common channel multiplier == 1 case */
__attribute__ ((noinline))
static void esp_nn_depthwise_conv_s8_ch_mult_1(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const dw_conv_params_t *conv_params,
const quant_data_t *quant_data)
{
const uint16_t input_wd = input_dims->width;
const uint16_t input_ht = input_dims->height;
const uint16_t channels = input_dims->channels;
const int32_t input_offset = conv_params->in_offset;
const int32_t out_offset = conv_params->out_offset;
const uint16_t pad_wd = conv_params->padding.width;
const uint16_t pad_ht = conv_params->padding.height;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
const uint16_t filter_wd = filter_dims->width;
const uint16_t filter_ht = filter_dims->height;
const uint16_t out_wd = output_dims->width;
const uint16_t out_ht = output_dims->height;
const int32_t activation_min = conv_params->activation.min;
const int32_t activation_max = conv_params->activation.max;
int out_idx = 0;
for (int out_y = 0; out_y < out_ht; out_y++) { //height loop
const int16_t base_y = (out_y * stride_ht) - pad_ht;
for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
const int16_t base_x = (out_x * stride_wd) - pad_wd;
const int32_t *out_shift = quant_data->shift;
const int32_t *out_mult = quant_data->mult;
/* Select filter so as the point doesn't lie outside block */
int filter_y_start = max(0, -base_y);
int filter_x_start = max(0, -base_x);
int filter_y_end = min(filter_ht, input_ht - base_y);
int filter_x_end = min(filter_wd, input_wd - base_x);
int ch_idx = 0;
for (; ch_idx < channels - 3; ch_idx += 4) {//channel_loop
int32_t result0 = 0;
int32_t result1 = 0;
int32_t result2 = 0;
int32_t result3 = 0;
for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
const int32_t idx_y = base_y + filter_y_idx;
for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t idx_x = base_x + filter_x_idx;
int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels) + ch_idx;
int32_t input_val0 = input_data[input_index + 0] + input_offset;
int32_t input_val1 = input_data[input_index + 1] + input_offset;
int32_t input_val2 = input_data[input_index + 2] + input_offset;
int32_t input_val3 = input_data[input_index + 3] + input_offset;
int32_t filter_val0 = filter_data[filter_index + 0];
int32_t filter_val1 = filter_data[filter_index + 1];
int32_t filter_val2 = filter_data[filter_index + 2];
int32_t filter_val3 = filter_data[filter_index + 3];
result0 += input_val0 * filter_val0;
result1 += input_val1 * filter_val1;
result2 += input_val2 * filter_val2;
result3 += input_val3 * filter_val3;
}
}
if (bias) {
result0 += bias[ch_idx + 0];
result1 += bias[ch_idx + 1];
result2 += bias[ch_idx + 2];
result3 += bias[ch_idx + 3];
}
result0 = esp_nn_multiply_by_quantized_mult_fast(result0, *out_mult++, *out_shift++);
result1 = esp_nn_multiply_by_quantized_mult_fast(result1, *out_mult++, *out_shift++);
result2 = esp_nn_multiply_by_quantized_mult_fast(result2, *out_mult++, *out_shift++);
result3 = esp_nn_multiply_by_quantized_mult_fast(result3, *out_mult++, *out_shift++);
result0 += out_offset;
result1 += out_offset;
result2 += out_offset;
result3 += out_offset;
result0 = max(result0, activation_min);
result1 = max(result1, activation_min);
result2 = max(result2, activation_min);
result3 = max(result3, activation_min);
result0 = min(result0, activation_max);
result1 = min(result1, activation_max);
result2 = min(result2, activation_max);
result3 = min(result3, activation_max);
out_data[out_idx++] = result0;
out_data[out_idx++] = result1;
out_data[out_idx++] = result2;
out_data[out_idx++] = result3;
}
for (; ch_idx < channels; ch_idx++) {//channel_loop
int32_t result = 0;
for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
const int32_t idx_y = base_y + filter_y_idx;
for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t idx_x = base_x + filter_x_idx;
int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels) + ch_idx;
int32_t input_val = input_data[input_index] + input_offset;
int32_t filter_val = filter_data[filter_index];
result += input_val * filter_val;
}
}
if (bias) {
result += bias[ch_idx];
}
result = esp_nn_multiply_by_quantized_mult_fast(result, *out_mult++, *out_shift++);
result += out_offset;
result = max(result, activation_min);
result = min(result, activation_max);
out_data[out_idx++] = result;
}
}
}
}
void esp_nn_depthwise_conv_s8_opt(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const dw_conv_params_t *conv_params,
const quant_data_t *quant_data)
{
const uint16_t ch_mult = conv_params->ch_mult;
if (ch_mult == 1) {
esp_nn_depthwise_conv_s8_ch_mult_1(input_dims, input_data, filter_dims, filter_data,
bias, output_dims, out_data, conv_params, quant_data);
return;
}
const uint16_t input_wd = input_dims->width;
const uint16_t input_ht = input_dims->height;
const uint16_t channels = input_dims->channels;
const int32_t input_offset = conv_params->in_offset;
const int32_t out_offset = conv_params->out_offset;
const uint16_t pad_wd = conv_params->padding.width;
const uint16_t pad_ht = conv_params->padding.height;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
const uint16_t filter_wd = filter_dims->width;
const uint16_t filter_ht = filter_dims->height;
const uint16_t out_wd = output_dims->width;
const uint16_t out_ht = output_dims->height;
const int32_t activation_min = conv_params->activation.min;
const int32_t activation_max = conv_params->activation.max;
int out_idx = 0;
for (int out_y = 0; out_y < out_ht; out_y++) { //height loop
const int16_t base_y = (out_y * stride_ht) - pad_ht;
for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
const int16_t base_x = (out_x * stride_wd) - pad_wd;
const int32_t *out_shift = quant_data->shift;
const int32_t *out_mult = quant_data->mult;
/* Select filter so as the point doesn't lie outside block */
int filter_y_start = max(0, -base_y);
int filter_x_start = max(0, -base_x);
int filter_y_end = min(filter_ht, input_ht - base_y);
int filter_x_end = min(filter_wd, input_wd - base_x);
for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop
int ch_mult_idx = 0;
for (; ch_mult_idx < ch_mult - 3; ch_mult_idx += 4) {
int32_t result0 = 0;
int32_t result1 = 0;
int32_t result2 = 0;
int32_t result3 = 0;
const int out_ch_idx = ch_idx * ch_mult + ch_mult_idx;
for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
const int32_t idx_y = base_y + filter_y_idx;
for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t idx_x = base_x + filter_x_idx;
int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx;
int32_t input_val = input_data[input_index] + input_offset;
int32_t filter_val0 = filter_data[filter_index + 0];
int32_t filter_val1 = filter_data[filter_index + 1];
int32_t filter_val2 = filter_data[filter_index + 2];
int32_t filter_val3 = filter_data[filter_index + 3];
result0 += input_val * filter_val0;
result1 += input_val * filter_val1;
result2 += input_val * filter_val2;
result3 += input_val * filter_val3;
}
}
if (bias) {
result0 += bias[out_ch_idx + 0];
result1 += bias[out_ch_idx + 1];
result2 += bias[out_ch_idx + 2];
result3 += bias[out_ch_idx + 3];
}
result0 = esp_nn_multiply_by_quantized_mult_fast(result0, *out_mult++, *out_shift++);
result1 = esp_nn_multiply_by_quantized_mult_fast(result1, *out_mult++, *out_shift++);
result2 = esp_nn_multiply_by_quantized_mult_fast(result2, *out_mult++, *out_shift++);
result3 = esp_nn_multiply_by_quantized_mult_fast(result3, *out_mult++, *out_shift++);
result0 += out_offset;
result1 += out_offset;
result2 += out_offset;
result3 += out_offset;
result0 = max(result0, activation_min);
result1 = max(result1, activation_min);
result2 = max(result2, activation_min);
result3 = max(result3, activation_min);
result0 = min(result0, activation_max);
result1 = min(result1, activation_max);
result2 = min(result2, activation_max);
result3 = min(result3, activation_max);
out_data[out_idx++] = result0;
out_data[out_idx++] = result1;
out_data[out_idx++] = result2;
out_data[out_idx++] = result3;
}
for (; ch_mult_idx < ch_mult; ch_mult_idx++) {
int32_t result = 0;
const int out_ch_idx = ch_idx * ch_mult + ch_mult_idx;
for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
const int32_t idx_y = base_y + filter_y_idx;
for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t idx_x = base_x + filter_x_idx;
int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx;
int32_t input_val = input_data[input_index] + input_offset;
int32_t filter_val = filter_data[filter_index];
result += input_val * filter_val;
}
}
if (bias) {
result += bias[out_ch_idx];
}
result = esp_nn_multiply_by_quantized_mult_fast(result, *out_mult++, *out_shift++);
result += out_offset;
result = max(result, activation_min);
result = min(result, activation_max);
out_data[out_idx++] = result;
}
}
}
}
}

View File

@@ -1,543 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <esp_nn_defs.h>
#include <common_functions.h>
static int16_t *scratch_buffer = NULL;
extern void esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3(const int16_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const uint16_t pad_wd,
const uint16_t pad_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint16_t ch_mult,
const int16_t *filter_data,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max);
extern void esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(const int8_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const int32_t input_offset,
const uint16_t stride_wd,
const uint16_t stride_ht,
const int8_t *filter_data,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max);
extern void esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3(const int16_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const uint16_t stride_wd,
const uint16_t stride_ht,
const int16_t *filter_data,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max);
extern void esp_nn_depthwise_conv_s16_mult8_esp32s3(const int16_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const uint16_t pad_wd,
const uint16_t pad_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint16_t ch_mult,
const int16_t *filter_data,
const uint16_t filter_wd,
const uint16_t filter_ht,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max);
extern void esp_nn_depthwise_conv_s16_mult4_esp32s3(const int16_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const uint16_t pad_wd,
const uint16_t pad_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint16_t ch_mult,
const int16_t *filter_data,
const uint16_t filter_wd,
const uint16_t filter_ht,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max);
extern void esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3(const int16_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const uint16_t pad_wd,
const uint16_t pad_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const int16_t *filter_data,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max);
extern void esp_nn_depthwise_conv_s16_mult1_esp32s3(const int16_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const uint16_t pad_wd,
const uint16_t pad_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const int16_t *filter_data,
const uint16_t filter_wd,
const uint16_t filter_ht,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max);
extern void esp_nn_s8_to_s16_esp32s3(const int8_t *src, int16_t *dst, const int size);
extern void esp_nn_aligned_s8_to_s16_with_offset_esp32s3(const int8_t *src, int16_t *dst,
const int size, const int32_t offset);
static void esp_nn_depthwise_conv_s8_unrolled(const int8_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const int32_t input_offset,
const uint16_t pad_wd,
const uint16_t pad_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint16_t ch_mult,
const int8_t *filter_data,
const uint16_t filter_wd,
const uint16_t filter_ht,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max)
{
int out_idx = 0;
for (int out_y = 0; out_y < out_ht; out_y++) { //height loop
const int16_t base_y = (out_y * stride_ht) - pad_ht;
for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
const int16_t base_x = (out_x * stride_wd) - pad_wd;
for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop
int ch_mult_idx = 0;
for (; ch_mult_idx < ch_mult - 3; ch_mult_idx += 4) {
int32_t result0 = 0, result1 = 0, result2 = 0, result3 = 0;
const int out_ch_idx = ch_mult_idx + ch_idx * ch_mult;
/* Select filter so as the point doesn't lie outside block */
int filter_y_start = max(0, -base_y);
int filter_x_start = max(0, -base_x);
int filter_y_end = min(filter_ht, input_ht - base_y);
int filter_x_end = min(filter_wd, input_wd - base_x);
for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
const int32_t idx_y = base_y + filter_y_idx;
for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t idx_x = base_x + filter_x_idx;
int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx;
int32_t input_val = input_data[input_index] + input_offset;
int32_t filter_val0 = filter_data[filter_index + 0];
int32_t filter_val1 = filter_data[filter_index + 1];
int32_t filter_val2 = filter_data[filter_index + 2];
int32_t filter_val3 = filter_data[filter_index + 3];
result0 += input_val * filter_val0;
result1 += input_val * filter_val1;
result2 += input_val * filter_val2;
result3 += input_val * filter_val3;
}
}
if (bias) {
result0 += bias[out_ch_idx + 0];
result1 += bias[out_ch_idx + 1];
result2 += bias[out_ch_idx + 2];
result3 += bias[out_ch_idx + 3];
}
result0 = esp_nn_multiply_by_quantized_mult(result0,
out_mult[out_ch_idx + 0], out_shift[out_ch_idx + 0]);
result1 = esp_nn_multiply_by_quantized_mult(result1,
out_mult[out_ch_idx + 1], out_shift[out_ch_idx + 1]);
result2 = esp_nn_multiply_by_quantized_mult(result2,
out_mult[out_ch_idx + 2], out_shift[out_ch_idx + 2]);
result3 = esp_nn_multiply_by_quantized_mult(result3,
out_mult[out_ch_idx + 3], out_shift[out_ch_idx + 3]);
result0 += out_offset;
result1 += out_offset;
result2 += out_offset;
result3 += out_offset;
result0 = max(result0, activation_min);
result1 = max(result1, activation_min);
result2 = max(result2, activation_min);
result3 = max(result3, activation_min);
result0 = min(result0, activation_max);
result1 = min(result1, activation_max);
result2 = min(result2, activation_max);
result3 = min(result3, activation_max);
out_data[out_idx++] = result0;
out_data[out_idx++] = result1;
out_data[out_idx++] = result2;
out_data[out_idx++] = result3;
}
/* left-over */
for (; ch_mult_idx < ch_mult; ch_mult_idx++) {
int32_t result = 0;
const int out_ch_idx = ch_mult_idx + ch_idx * ch_mult;
/* Select filter so as the point doesn't lie outside block */
int filter_y_start = max(0, -base_y);
int filter_x_start = max(0, -base_x);
int filter_y_end = min(filter_ht, input_ht - base_y);
int filter_x_end = min(filter_wd, input_wd - base_x);
for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
const int32_t idx_y = base_y + filter_y_idx;
for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t idx_x = base_x + filter_x_idx;
int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx;
int32_t input_val = input_data[input_index] + input_offset;
int32_t filter_val = filter_data[filter_index];
result += input_val * filter_val;
}
}
if (bias) {
result += bias[out_ch_idx];
}
result = esp_nn_multiply_by_quantized_mult(result, out_mult[out_ch_idx], out_shift[out_ch_idx]);
result += out_offset;
result = max(result, activation_min);
result = min(result, activation_max);
out_data[out_idx++] = result;
}
}
}
}
}
void esp_nn_depthwise_conv_s8_ch_mult1(const int8_t *input_data,
const uint16_t input_wd,
const uint16_t input_ht,
const uint16_t channels,
const int32_t input_offset,
const uint16_t pad_wd,
const uint16_t pad_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const int8_t *filter_data,
const uint16_t filter_wd,
const uint16_t filter_ht,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_wd,
const uint16_t out_ht,
const int32_t out_offset,
const int32_t *out_shift,
const int32_t *out_mult,
const int32_t activation_min,
const int32_t activation_max)
{
int out_idx = 0;
for (int out_y = 0; out_y < out_ht; out_y++) { //height loop
const int16_t base_y = (out_y * stride_ht) - pad_ht;
for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
const int16_t base_x = (out_x * stride_wd) - pad_wd;
for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop
int32_t result = 0;
/* Select filter so as the point doesn't lie outside block */
int filter_y_start = max(0, -base_y);
int filter_x_start = max(0, -base_x);
int filter_y_end = min(filter_ht, input_ht - base_y);
int filter_x_end = min(filter_wd, input_wd - base_x);
for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
const int32_t idx_y = base_y + filter_y_idx;
for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t idx_x = base_x + filter_x_idx;
int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * channels + ch_idx;
int32_t input_val = input_data[input_index] + input_offset;
int32_t filter_val = filter_data[filter_index];
result += input_val * filter_val;
}
}
if (bias) {
result += bias[ch_idx];
}
result = esp_nn_multiply_by_quantized_mult(result, out_mult[ch_idx], out_shift[ch_idx]);
result += out_offset;
result = max(result, activation_min);
result = min(result, activation_max);
out_data[out_idx++] = result;
}
}
}
}
int esp_nn_get_depthwise_conv_scratch_size_esp32s3(const data_dims_t *input_dims,
const data_dims_t *filter_dims,
const data_dims_t *output_dims,
const dw_conv_params_t *conv_params)
{
const uint16_t input_wd = input_dims->width;
const uint16_t input_ht = input_dims->height;
const uint16_t channels = input_dims->channels;
const uint16_t filter_wd = filter_dims->width;
const uint16_t filter_ht = filter_dims->height;
const uint16_t ch_mult = conv_params->ch_mult;
const uint16_t out_wd = output_dims->width;
const uint16_t out_ht = output_dims->height;
const uint16_t pad_wd = conv_params->padding.width;
const uint16_t pad_ht = conv_params->padding.height;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
int filter_size = filter_wd * filter_ht * channels * ch_mult;
int pad_width = 0, pad_height = 0;
if ((ch_mult == 1) && (channels % 8 == 0) && (filter_wd == 3) && (filter_ht == 3)) {
if (channels % 16 == 0) {
if (pad_wd || pad_ht) {
pad_width = pad_wd * 2;
pad_height = pad_ht * 2;
} else {
// check if we need to pad additionally
pad_width = (out_wd * stride_wd + filter_wd - 1) - input_wd;
pad_height = (out_ht * stride_ht + filter_ht - 1) - input_ht;
// printf("in(%d %d %d), out(%d %d), filter (%d %d) stride (%d %d), pad (%d %d)",
// input_wd, input_ht, channels, out_wd, out_ht, filter_wd, filter_ht,
// stride_wd, stride_ht, pad_wd, pad_ht);
}
if (pad_width || pad_height) {
int input_size = (input_wd + pad_width) * (input_ht + pad_height) * channels;
// printf("ask1 %d\n", filter_size + input_size + 16);
return filter_size + input_size + 16; // 16 for alignment
} else {
// printf("ask2 %d\n", filter_size + 16);
return filter_size + 16; // 16 for alignment
}
} else {
int input_size = input_wd * input_ht * channels;
// printf("ask3 %d\n", 2 * (filter_size + input_size) + 16);
return 2 * (filter_size + input_size) + 16; // 16 for alignment
}
} else if (ch_mult % 4 == 0) {
int input_size = input_wd * input_ht * channels;
// printf("ask4 %d\n", 2 * (filter_size + input_size) + 16);
return 2 * (filter_size + input_size) + 16; // 16 for alignment
}
return 32; // just few bytes
}
void esp_nn_set_depthwise_conv_scratch_buf_esp32s3(void *buf)
{
scratch_buffer = (int16_t *) buf;
}
/**
* Assumption 1: i/p channels == o/p channels
* Assumption 2: Pointers are valid
* Assumption 3: dialation width = 1
*/
void esp_nn_depthwise_conv_s8_esp32s3(const data_dims_t *input_dims,
const int8_t *input_data,
const data_dims_t *filter_dims,
const int8_t *filter_data,
const int32_t *bias,
const data_dims_t *output_dims,
int8_t *out_data,
const dw_conv_params_t *conv_params,
const quant_data_t *quant_data)
{
const uint16_t input_wd = input_dims->width;
const uint16_t input_ht = input_dims->height;
const uint16_t channels = input_dims->channels;
const int32_t input_offset = conv_params->in_offset;
const int32_t out_offset = conv_params->out_offset;
const uint16_t pad_wd = conv_params->padding.width;
const uint16_t pad_ht = conv_params->padding.height;
const uint16_t stride_wd = conv_params->stride.width;
const uint16_t stride_ht = conv_params->stride.height;
const uint16_t filter_wd = filter_dims->width;
const uint16_t filter_ht = filter_dims->height;
const uint16_t out_wd = output_dims->width;
const uint16_t out_ht = output_dims->height;
const int32_t *out_shift = quant_data->shift;
const int32_t *out_mult = quant_data->mult;
const int32_t activation_min = conv_params->activation.min;
const int32_t activation_max = conv_params->activation.max;
const uint16_t ch_mult = conv_params->ch_mult;
int filter_size = filter_wd * filter_ht * channels * ch_mult;
int align_len = 16 - (filter_size & 15);
int input_size = input_wd * input_ht * channels;
int16_t *filter_data16 = scratch_buffer;
int16_t *input_data16 = scratch_buffer + filter_size + align_len;
if (scratch_buffer == NULL) {
printf("esp_nn_depthwise_conv error! scratch_buffer not set!\n");
return;
}
if ((ch_mult == 1) && (channels % 8 == 0)) {
if ((filter_wd == 3) && (filter_ht == 3)) {
if ((channels % 16 == 0) && (pad_wd == 1) && (pad_ht == 1)) {
/* process in 8 bits */
int8_t *filter_aligned = (int8_t *) scratch_buffer;
int8_t *input_padded = (int8_t *) scratch_buffer + filter_size + align_len;
memcpy(filter_aligned, filter_data, filter_size);
esp_nn_aligned_s8_pad_with_value(input_data, input_padded, input_wd, input_ht, channels,
-input_offset, pad_wd, pad_ht);
esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(input_padded, input_wd + 2 * pad_wd,
input_ht + 2 * pad_ht, channels, input_offset,
stride_wd, stride_ht, filter_aligned, bias,
out_data, out_wd, out_ht, out_offset, out_shift,
out_mult, activation_min, activation_max);
} else if ((channels % 16 == 0) && (pad_wd == 0) && (pad_ht == 0)) {
/* process in 8 bits */
int8_t *filter_aligned = (int8_t *) scratch_buffer;
int8_t *input_padded = (int8_t *) scratch_buffer + filter_size + align_len;
// check if we need to pad additionally
int pad_right = (out_wd * stride_wd + filter_wd - 1) - input_wd;
int pad_bottom = (out_ht * stride_ht + filter_ht - 1) - input_ht;
if (pad_right || pad_bottom) { // pad right and bottom
esp_nn_aligned_s8_pad_end_with_value(input_data, input_padded, input_wd, input_ht,
channels, -input_offset, pad_right, pad_bottom);
} else {
input_padded = (int8_t *) input_data;
}
memcpy(filter_aligned, filter_data, filter_size);
esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(input_padded, input_wd + pad_right,
input_ht + pad_bottom, channels, input_offset,
stride_wd, stride_ht, filter_aligned, bias,
out_data, out_wd, out_ht, out_offset, out_shift,
out_mult, activation_min, activation_max);
} else { /* (channels % 8) == 0 */
esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size);
esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input_data, input_data16, input_size, input_offset);
esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3(input_data16, input_wd, input_ht, channels,
pad_wd, pad_ht, stride_wd, stride_ht, filter_data16,
bias, out_data, out_wd, out_ht, out_offset, out_shift,
out_mult, activation_min, activation_max);
}
} else { // all other ch_mult == 1, `channels % 8 == 0`
esp_nn_depthwise_conv_s8_ch_mult1(input_data, input_wd, input_ht, channels, input_offset,
pad_wd, pad_ht, stride_wd, stride_ht,
filter_data, filter_wd, filter_ht,
bias, out_data, out_wd, out_ht, out_offset, out_shift,
out_mult, activation_min, activation_max);
}
} else if (ch_mult % 8 == 0) {
esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size);
esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input_data, input_data16, input_size, input_offset);
if (filter_wd == 3 && filter_ht == 3) {
esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3(input_data16, input_wd, input_ht, channels,
pad_wd, pad_ht, stride_wd, stride_ht, ch_mult,
filter_data16, bias,
out_data, out_wd, out_ht, out_offset, out_shift,
out_mult, activation_min, activation_max);
} else {
esp_nn_depthwise_conv_s16_mult8_esp32s3(input_data16, input_wd, input_ht, channels,
pad_wd, pad_ht, stride_wd, stride_ht, ch_mult,
filter_data16, filter_wd, filter_ht, bias,
out_data, out_wd, out_ht, out_offset, out_shift,
out_mult, activation_min, activation_max);
}
} else if (ch_mult % 4 == 0) {
esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size);
esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input_data, input_data16, input_size, input_offset);
esp_nn_depthwise_conv_s16_mult4_esp32s3(input_data16, input_wd, input_ht, channels,
pad_wd, pad_ht, stride_wd, stride_ht, ch_mult,
filter_data16, filter_wd, filter_ht, bias,
out_data, out_wd, out_ht, out_offset, out_shift,
out_mult, activation_min, activation_max);
} else {
esp_nn_depthwise_conv_s8_unrolled(input_data, input_wd, input_ht, channels, input_offset,
pad_wd, pad_ht, stride_wd, stride_ht, ch_mult,
filter_data, filter_wd, filter_ht,
bias, out_data, out_wd, out_ht, out_offset, out_shift,
out_mult, activation_min, activation_max);
}
}

View File

@@ -1,50 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <common_functions.h>
void esp_nn_fully_connected_s8_ansi(const int8_t *input_data,
const int32_t input_offset,
const uint16_t row_len,
const int8_t *filter_data,
const int32_t filter_offset,
const int32_t *bias,
int8_t *out_data,
const uint16_t out_channels,
const int32_t out_offset,
const int32_t out_shift,
const int32_t out_mult,
const int32_t activation_min,
const int32_t activation_max)
{
for (int32_t out_c = 0; out_c < out_channels; ++out_c) {
int32_t result = 0;
for (int32_t data_idx = 0; data_idx < row_len; data_idx++) {
int32_t filter_index = row_len * out_c + data_idx;
int32_t input_val = input_data[data_idx];
int32_t filter_val = filter_data[filter_index];
result += (filter_val + filter_offset) * (input_val + input_offset);
}
if (bias) {
result += bias[out_c];
}
result = esp_nn_multiply_by_quantized_mult(result, out_mult, out_shift);
result += out_offset;
result = max(result, activation_min);
result = min(result, activation_max);
out_data[out_c] = (int8_t) result;
}
}

View File

@@ -1,72 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <common_functions.h>
void esp_nn_avg_pool_s8_ansi(const int8_t *input,
const uint16_t input_wd,
const uint16_t input_ht,
int8_t *output,
const uint16_t output_wd,
const uint16_t output_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint16_t filter_wd,
const uint16_t filter_ht,
const uint16_t pad_wd,
const uint16_t pad_ht,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t channels)
{
int32_t base_y = -pad_ht;
for (int32_t out_y = 0; out_y < output_ht; out_y++, base_y += stride_ht) {
int32_t base_x = -pad_wd;
for (int32_t out_x = 0; out_x < output_wd; out_x++, base_x += stride_wd) {
for (int32_t ch_idx = 0; ch_idx < channels; ch_idx++) {
int32_t result = 0;
int32_t filter_cnt = 0;
/* Make sure filter does not cross the input box */
int32_t filter_y_start = max(0, -base_y);
int32_t filter_x_start = max(0, -base_x);
int32_t filter_y_end = min(filter_ht, input_ht - base_y);
int32_t filter_x_end = min(filter_wd, input_wd - base_x);
for (int32_t filter_y = filter_y_start; filter_y < filter_y_end; filter_y++) {
for (int32_t filter_x = filter_x_start; filter_x < filter_x_end; filter_x++) {
int32_t in_x_idx = base_x + filter_x;
int32_t in_y_idx = base_y + filter_y;
int32_t input_index = (in_y_idx * input_wd + in_x_idx) * channels + ch_idx;
result += input[input_index];
filter_cnt++;
}
}
/* Rounded average */
result = result > 0 ? (result + filter_cnt / 2) / filter_cnt
: (result - filter_cnt / 2) / filter_cnt;
/* Activation function */
result = max(result, activation_min);
result = min(result, activation_max);
int32_t output_index = (out_y * output_wd + out_x) * channels + ch_idx;
output[output_index] = (int8_t) result;
}
}
}
}

View File

@@ -1,66 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <common_functions.h>
void esp_nn_max_pool_s8_ansi(const int8_t *input,
const uint16_t input_wd,
const uint16_t input_ht,
int8_t *output,
const uint16_t output_wd,
const uint16_t output_ht,
const uint16_t stride_wd,
const uint16_t stride_ht,
const uint16_t filter_wd,
const uint16_t filter_ht,
const uint16_t pad_wd,
const uint16_t pad_ht,
const int32_t activation_min,
const int32_t activation_max,
const uint16_t channels)
{
int32_t base_y = -pad_ht;
for (int32_t out_y = 0; out_y < output_ht; out_y++, base_y += stride_ht) {
int32_t base_x = -pad_wd;
for (int32_t out_x = 0; out_x < output_wd; out_x++, base_x += stride_wd) {
/* Make sure filter does not cross the input box */
int32_t filter_y_start = max(0, -base_y);
int32_t filter_x_start = max(0, -base_x);
int32_t filter_y_end = min(filter_ht, input_ht - base_y);
int32_t filter_x_end = min(filter_wd, input_wd - base_x);
for (int32_t ch_idx = 0; ch_idx < channels; ch_idx++) {
int8_t result = INT8_MIN;
for (int32_t filter_y = filter_y_start; filter_y < filter_y_end; filter_y++) {
for (int32_t filter_x = filter_x_start; filter_x < filter_x_end; filter_x++) {
int32_t in_x_idx = base_x + filter_x;
int32_t in_y_idx = base_y + filter_y;
int32_t input_index = (in_y_idx * input_wd + in_x_idx) * channels + ch_idx;
result = max(input[input_index], result);
}
}
/* Activation function */
result = max(result, activation_min);
result = min(result, activation_max);
int32_t output_index = (out_y * output_wd + out_x) * channels + ch_idx;
output[output_index] = result;
}
}
}
}

View File

@@ -1,88 +0,0 @@
// Copyright 2022 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "softmax_common.h"
int32_t esp_nn_get_softmax_scratch_size_ansi(const int32_t width, const int32_t height)
{
(void) width;
(void) height;
return 0;
}
void esp_nn_set_softmax_scratch_buf_ansi(void *buffer)
{
(void) buffer;
return;
}
void esp_nn_softmax_s8_ansi(const int8_t *input_data,
const int32_t height,
const int32_t width,
const int32_t mult,
const int32_t shift,
const int32_t diff_min,
int8_t *output_data)
{
// The representation chosen for the input to the exp() function is Q5.26.
// We need to leave extra space since values that we skip might be as large as
// -32 before multiplying by input mult, and therefore as large as
// -16 afterwards. Note that exp(-8) is definitely not insignificant to
// accumulation, but exp(-16) definitely is.
#define ACCUM_BITS 12
#define DIFF_BITS 5
const int32_t mask = (1 << shift);
int32_t col = 0;
const int8_t *in_ptr = input_data;
int8_t *out_ptr = output_data;
for (int row_idx = 0; row_idx < height; row_idx++) {
int8_t max_in_row = in_ptr[0];
for (col = 1; col < width; col++) {
max_in_row = max(max_in_row, in_ptr[col]);
}
int32_t input_diff = 0;
int32_t sum_of_exps = 0;
for (col = 0; col < width; col++) {
input_diff = in_ptr[col] - max_in_row;
if (input_diff >= diff_min) {
const int32_t input_diff_rescaled = SAT_HIGH_MUL(input_diff * mask, mult);
const int32_t exp_raw = esp_nn_exp_on_negative_values(input_diff_rescaled);
sum_of_exps += DIV_POW2(exp_raw, ACCUM_BITS);
}
}
const int32_t headroom_plus1 = esp_nn_clz32((uint32_t) sum_of_exps);
const int32_t shifted_scale = ONE_OVER_ONE_X((sum_of_exps << headroom_plus1) - (1 << 31));
const int32_t bits_over_unit = ACCUM_BITS - headroom_plus1 + 31 - sizeof(int8_t) * 8;
for (col = 0; col < width; col++) {
input_diff = in_ptr[col] - max_in_row;
if (input_diff >= diff_min) {
const int32_t input_diff_rescaled = SAT_HIGH_MUL(input_diff * mask, mult);
const int32_t exp_raw = esp_nn_exp_on_negative_values(input_diff_rescaled);
const int32_t shifted_output = SAT_HIGH_MUL(shifted_scale, exp_raw);
const int32_t result = DIV_POW2(shifted_output, bits_over_unit) - 128;
out_ptr[col] = (int8_t) esp_nn_saturate8(result);
} else {
out_ptr[col] = -128;
}
}
in_ptr += width;
out_ptr += width;
}
}

View File

@@ -1,108 +0,0 @@
// Copyright 2022 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "softmax_common.h"
#include <stdio.h>
static int32_t *scratch_buf = NULL;
/**
* @brief Get scratch buffer size needed by softmax function
*
* @param width
* @param height
* @return size in bytes
*
* @note buffer must be 4 byte aligned
*/
int32_t esp_nn_get_softmax_scratch_size_opt(const int32_t width, const int32_t height)
{
(void) height;
return width * 4;
}
/**
* @brief Set scratch buffer to be used by softmax function
*
* @param buffer this can be NULL if one needs to unset it
* must be aligned to 4 bytes
*/
void esp_nn_set_softmax_scratch_buf_opt(void *buffer)
{
scratch_buf = (int32_t *) buffer;
}
void esp_nn_softmax_s8_opt(const int8_t *input_data,
const int32_t height,
const int32_t width,
const int32_t mult,
const int32_t shift,
const int32_t diff_min,
int8_t *output_data)
{
if (scratch_buf == NULL) {
printf("%s error! scratch buffer not set\n", __FUNCTION__);
return;
}
// The representation chosen for the input to the exp() function is Q5.26.
// We need to leave extra space since values that we skip might be as large as
// -32 before multiplying by input mult, and therefore as large as
// -16 afterwards. Note that exp(-8) is definitely not insignificant to
// accumulation, but exp(-16) definitely is.
#define ACCUM_BITS 12
#define DIFF_BITS 5
const int32_t mask = (1 << shift);
int32_t col = 0;
const int8_t *in_ptr = input_data;
int8_t *out_ptr = output_data;
for (int row_idx = 0; row_idx < height; row_idx++) {
int8_t max_in_row = in_ptr[0];
for (col = 1; col < width; col++) {
max_in_row = max(max_in_row, in_ptr[col]);
}
int32_t input_diff = 0;
int32_t sum_of_exps = 0;
for (col = 0; col < width; col++) {
input_diff = in_ptr[col] - max_in_row;
if (input_diff >= diff_min) {
const int32_t input_diff_rescaled = SAT_HIGH_MUL(input_diff * mask, mult);
const int32_t exp_raw = esp_nn_exp_on_negative_values(input_diff_rescaled);
scratch_buf[col] = exp_raw; // store to avoid duplicate calculation later
sum_of_exps += DIV_POW2(exp_raw, ACCUM_BITS);
}
}
const int32_t headroom_plus1 = esp_nn_clz32((uint32_t) sum_of_exps);
const int32_t shifted_scale = ONE_OVER_ONE_X((sum_of_exps << headroom_plus1) - (1 << 31));
const int32_t bits_over_unit = ACCUM_BITS - headroom_plus1 + 31 - sizeof(int8_t) * 8;
for (col = 0; col < width; col++) {
input_diff = in_ptr[col] - max_in_row;
if (input_diff >= diff_min) {
int32_t exp_raw = scratch_buf[col];
const int32_t shifted_output = SAT_HIGH_MUL(shifted_scale, exp_raw);
const int32_t result = DIV_POW2(shifted_output, bits_over_unit) - 128;
out_ptr[col] = (int8_t) esp_nn_saturate8(result);
} else {
out_ptr[col] = -128;
}
}
in_ptr += width;
out_ptr += width;
}
}

View File

@@ -1,104 +0,0 @@
// Copyright 2022 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <common_functions.h>
#define MASK_IF_ZERO(x) (x) == 0 ? ~0 : 0
#define MASK_IF_NON_ZERO(x) (x) != 0 ? ~0 : 0
#define SELECT_USING_MASK(mask, a, b) ((mask) & (a)) ^ (~(mask) & (b))
#define SAT_HIGH_MUL(x, y) esp_nn_sat_round_doubling_high_mul((x), (y))
#define DIV_POW2(x,y) esp_nn_div_by_power_of_two((x), (y))
__NN_FORCE_INLINE__ int32_t mul_power_of_2(int val, int exp)
{
const int32_t thresh = ((1 << (31 - exp)) - 1);
int32_t result = val << exp;
result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), INT32_MAX, result);
result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), INT32_MIN, result);
return result;
}
/**
* @brief Calculate `1 / (1 + x)` for x in [0, 1]
*
* @param val input value to calculate `1/(1+x)` for
* @return `int32_t` result
* @note Newton-Raphson division
*
* https://en.wikipedia.org/wiki/Division_algorithm#Newton.E2.80.93Raphson_division
* Refer to that page for the logic behind the 48/17 and 32/17 constants.
* Pseudocode: https://en.wikipedia.org/wiki/Division_algorithm#Pseudocode
*/
__NN_FORCE_INLINE__ int32_t esp_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val)
{
const int64_t sum = (int64_t) val + INT32_MAX;
const int32_t half_denominator = (int32_t) ((sum + (sum >= 0 ? 1 : -1)) / 2L);
int32_t constant_48_over_17 = 1515870810;
int32_t constant_neg_32_over_17 = -1010580540;
int32_t x = constant_48_over_17 + SAT_HIGH_MUL(half_denominator, constant_neg_32_over_17);
const int32_t fixed_2_one = (1 << 29);
x += mul_power_of_2(SAT_HIGH_MUL(x, fixed_2_one - SAT_HIGH_MUL(half_denominator, x)), 2);
x += mul_power_of_2(SAT_HIGH_MUL(x, fixed_2_one - SAT_HIGH_MUL(half_denominator, x)), 2);
x += mul_power_of_2(SAT_HIGH_MUL(x, fixed_2_one - SAT_HIGH_MUL(half_denominator, x)), 2);
return mul_power_of_2(x, 1);
}
#define ONE_OVER_ONE_X(x) esp_nn_one_over_one_plus_x_for_x_in_0_1((x))
/**
* @brief Return exp(x) for x < 0.
*
*/
__NN_FORCE_INLINE__ int32_t esp_nn_exp_on_negative_values(int32_t val)
{
int32_t shift = 24;
const int32_t one_quarter = (1 << shift);
int32_t mask = one_quarter - 1;
const int32_t val_mod_minus_quarter = (val & mask) - one_quarter;
const int32_t remainder = val_mod_minus_quarter - val;
// calculate exponent for x in [-1/4, 0) in `result`
const int32_t x = (val_mod_minus_quarter << 5) + (1 << 28);
const int32_t x2 = SAT_HIGH_MUL(x, x);
const int32_t x3 = SAT_HIGH_MUL(x2, x);
const int32_t x4 = SAT_HIGH_MUL(x2, x2);
const int32_t one_over_3 = 715827883;
const int32_t one_over_8 = 1895147668;
const int32_t x4_over_4 = DIV_POW2(x4, 2);
const int32_t x4_over_4_plus_x3_over_6_plus_x2_over_2 = DIV_POW2(SAT_HIGH_MUL(x4_over_4 + x3, one_over_3) + x2, 1);
int32_t result = one_over_8 + SAT_HIGH_MUL(one_over_8, x + x4_over_4_plus_x3_over_6_plus_x2_over_2);
#define SELECT_IF_NON_ZERO(x) { \
mask = MASK_IF_NON_ZERO(remainder & (1 << shift++)); \
result = SELECT_USING_MASK(mask, SAT_HIGH_MUL(result, x), result); \
}
SELECT_IF_NON_ZERO(1672461947)
SELECT_IF_NON_ZERO(1302514674)
SELECT_IF_NON_ZERO(790015084)
SELECT_IF_NON_ZERO(290630308)
SELECT_IF_NON_ZERO(39332535)
SELECT_IF_NON_ZERO(720401)
SELECT_IF_NON_ZERO(242)
#undef SELECT_IF_NON_ZERO
mask = MASK_IF_ZERO(val);
return SELECT_USING_MASK(mask, INT32_MAX, result);
}

View File

@@ -1,9 +0,0 @@
# The following lines of boilerplate have to be in your project's
# CMakeLists in this exact order for cmake to work correctly
cmake_minimum_required(VERSION 3.5)
set(EXTRA_COMPONENT_DIRS "../" "../tests/")
set(IDF_EXCLUDE_COMPONENTS test test_app)
include($ENV{IDF_PATH}/tools/cmake/project.cmake)
project(test_app)

View File

@@ -1,7 +0,0 @@
set(COMPONENT_SRCS "main.c")
set(COMPONENT_ADD_INCLUDEDIRS "")
set(COMPONENT_PRIV_REQUIRES tests)
register_component()

View File

@@ -1,8 +0,0 @@
#
# Main component makefile.
#
# This Makefile can be left empty. By default, it will take the sources in the
# src/ directory, compile them and link them into lib(subdirectory_name).a
# in the build directory. This behaviour is entirely configurable,
# please read the ESP-IDF documents if you need to do this.
#

View File

@@ -1,87 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <esp_log.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <test_functions.h>
#include <esp_timer.h>
static const char *TAG = "test_app";
static uint32_t start_c, start_opt, total_c, total_opt;
void profile_c_start()
{
/* initiate profiling */
start_c = esp_cpu_get_ccount();
}
void profile_c_end()
{
/* record profile number */
total_c = esp_cpu_get_ccount() - start_c;
}
void profile_opt_start()
{
/* initiate profiling */
start_opt = esp_cpu_get_ccount();
}
void profile_opt_end()
{
/* record profile number */
total_opt = esp_cpu_get_ccount() - start_opt;
}
void app_main()
{
/* s8 tests */
ESP_LOGI(TAG, "Running s8 tests...");
esp_nn_add_elementwise_s8_test();
printf("add, c %u opt %u\n", total_c, total_opt);
esp_nn_mul_elementwise_s8_test();
printf("mul, c %u opt %u\n", total_c, total_opt);
esp_nn_depthwise_conv_s8_test();
printf("depthwise, c %u opt %u\n", total_c, total_opt);
esp_nn_conv_s8_test();
printf("conv2d, c %u opt %u\n", total_c, total_opt);
esp_nn_relu6_s8_test();
printf("relu, c %u opt %u\n", total_c, total_opt);
esp_nn_avg_pool_s8_test();
printf("avg_pool, c %u opt %u\n", total_c, total_opt);
esp_nn_max_pool_s8_test();
printf("max_pool, c %u opt %u\n", total_c, total_opt);
esp_nn_fully_connected_s8_test();
printf("fully_connected, c %u opt %u\n", total_c, total_opt);
esp_nn_softmax_s8_test();
printf("softmax, c %u opt %u\n", total_c, total_opt);
ESP_LOGI(TAG, "s8 tests done!\n");
/* u8 tests */
//ESP_LOGI(TAG, "Running u8 tests...");
//esp_nn_add_elementwise_u8_test();
//esp_nn_depthwise_conv_u8_test();
//esp_nn_conv_u8_test();
//esp_nn_avg_pool_u8_test();
//esp_nn_max_pool_u8_test();
//esp_nn_fully_connected_u8_test();
//ESP_LOGI(TAG, "u8 tests done!\n");
}

View File

@@ -1,5 +0,0 @@
#
# esp-nn
#
CONFIG_NN_ESP32=y

View File

@@ -1,8 +0,0 @@
# Default configurations for ESP32-S3
CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240=y
CONFIG_ESP32S3_SPIRAM_SUPPORT=y
CONFIG_ESP32S3_DATA_CACHE_64KB=y
CONFIG_ESP32S3_DATA_CACHE_8WAYS=y
CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y

View File

@@ -1,15 +0,0 @@
set(COMPONENT_ADD_INCLUDEDIRS ./include/)
set(COMPONENT_SRCS "src/basic_math_test.c"
"src/convolution_test.c"
"src/fully_connected_test.c"
"src/pooling_test.c"
"src/relu_test.c"
"src/softmax_test.c")
set(COMPONENT_REQUIRES )
set(COMPONENT_PRIV_REQUIRES esp-nn)
register_component()
target_compile_options(${COMPONENT_LIB} PRIVATE -Wno-unused-function)

View File

@@ -1,4 +0,0 @@
# Tests for esp_nn library
- Include these in your test framework and run the framework.
- For IDF test please refer `test_app`

View File

@@ -1,5 +0,0 @@
#FIXME
COMPONENT_ADD_INCLUDEDIRS := include/
COMPONENT_SRCDIRS := src/

View File

@@ -1,48 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/* int8_t ops tests */
void esp_nn_add_elementwise_s8_test();
void esp_nn_mul_elementwise_s8_test();
void esp_nn_depthwise_conv_s8_test();
void esp_nn_conv_s8_test();
void esp_nn_avg_pool_s8_test();
void esp_nn_max_pool_s8_test();
void esp_nn_fully_connected_s8_test();
void esp_nn_relu6_s8_test();
void esp_nn_softmax_s8_test();
/* uint8_t ops tests */
void esp_nn_add_elementwise_u8_test();
void esp_nn_depthwise_conv_u8_test();
void esp_nn_conv_u8_test();
void esp_nn_avg_pool_u8_test();
void esp_nn_max_pool_u8_test();
void esp_nn_fully_connected_u8_test();
/* instructions test functions */
void compare_instructions_test();
void arith_instructions_test();
void min_max_instructions_test();
void bitwise_instructions_test();
void load_store_instructions_test();

View File

@@ -1,87 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <stdbool.h>
#include <common_functions.h>
#include <stdio.h>
/* mult value range */
#define MULT_MAX INT32_MAX
#define MULT_MIN 0
/* shift value range */
#define SHIFT_MIN -31
#define SHIFT_MAX 30
/**
* @brief callback function to run before C function
*/
void profile_c_start();
/**
* @brief callback function to run after C function
*/
void profile_c_end();
/**
* @brief callback function to run before optimized function
*/
void profile_opt_start();
/**
* @brief callback function to run after optimized function
*/
void profile_opt_end();
#define ANSI_COLOR_RED "\x1b[31m"
#define ANSI_COLOR_GREEN "\x1b[32m"
#define ANSI_COLOR_YELLOW "\x1b[33m"
#define ANSI_COLOR_BLUE "\x1b[34m"
#define ANSI_COLOR_MAGENTA "\x1b[35m"
#define ANSI_COLOR_CYAN "\x1b[36m"
#define ANSI_COLOR_RESET "\x1b[0m"
#define CHECK_EQUAL(ARRAY1, ARRAY2, size) ({ \
bool res = true; \
for (int _i = 0; _i < size; _i++) { \
if (ARRAY1[_i] != ARRAY2[_i]) { \
res = false; \
break; \
} \
} \
res; \
})
#define PRINT_ARRAY_INT(ARRAY, width, height) ({ \
int *_array = (int *) ARRAY; \
for (int _j = 0; _j < height; _j++) { \
for (int _i = 0; _i < width; _i++) { \
printf("%d\t", _array[width * _j + _i]); \
} \
printf("\n"); \
} \
printf("\n"); \
})
#define PRINT_ARRAY_HEX(ARRAY, width, height) ({ \
uint8_t *_array = (uint8_t *) ARRAY; \
for (int _j = 0; _j < height; _j++) { \
for (int _i = 0; _i < width; _i++) { \
printf("%02x\t", _array[width * _j + _i]); \
} \
printf("\n"); \
} \
printf("\n"); \
})

View File

@@ -1,355 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <common_functions.h>
#include <esp_nn.h>
#include "test_utils.h"
#if CONFIG_IDF_CMAKE
#if (CONFIG_SPIRAM_SUPPORT && (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC))
#define IDF_HEAP_CAPS 1
#endif
#if IDF_HEAP_CAPS
#include "esp_heap_caps.h"
#endif
#endif
void esp_nn_add_elementwise_s8_test()
{
/* prepare data */
const int size = 1600 + 8 + 7; /* odd len to test leftover */
int8_t *input1;
int8_t *input2;
int8_t *out_data_c;
int8_t *out_data_opt;
int8_t *input1_orig = NULL;
int8_t *input2_orig = NULL;
int8_t *out_c_orig = NULL;
int8_t *out_opt_orig = NULL;
int32_t input1_offset = 34;
int32_t input2_offset = 35;
int32_t output_offset = 36;
int32_t input1_shift = -8; // right_shift amt always <= 0
int32_t input2_shift = -8; // right_shift amt always <= 0
int32_t output_shift = -9; // right_shift amt always <= 0
int32_t left_shift = 15; // always +ve
int32_t input1_mult = INT32_MAX;
int32_t input2_mult = INT32_MAX;
int32_t output_mult = INT32_MAX;
int32_t activation_min = -128;
int32_t activation_max = 127;
for (int itr = 0; itr < 10; itr++) {
switch (itr) {
case 0: // all zeros
input1_offset = 0;
input2_offset = 0;
output_offset = 0;
input1_mult = 0;
input2_mult = 0;
output_mult = 0;
input1_shift = 0;
input2_shift = 0;
output_shift = 0;
left_shift = 0;
break;
case 1: // hit min
input1_offset = -127;
input2_offset = -127;
output_offset = -128;
input1_mult = MULT_MIN;
input2_mult = MULT_MIN;
output_mult = MULT_MIN;
input1_shift = 0;
input2_shift = 0;
output_shift = 0;
left_shift = 0;
break;
case 2: // hit max
input1_offset = 128;
input2_offset = 128;
output_offset = -127;
input1_mult = MULT_MAX;
input2_mult = MULT_MAX;
output_mult = MULT_MAX;
input1_shift = SHIFT_MIN;
input2_shift = SHIFT_MIN;
output_shift = SHIFT_MIN;
left_shift = 30 - 8; // since input is 8 bits
break;
case 3: // hit extreme max
input1_offset = 128;
input2_offset = 128;
output_offset = -127;
input1_mult = MULT_MAX;
input2_mult = MULT_MAX;
output_mult = MULT_MAX;
input1_shift = 0;
input2_shift = 0;
output_shift = 0;
left_shift = 30 - 8; // -8 since input is 8 bit
break;
default: // practical random input
input1_offset = rand() % 256 - 127; // range [-127, 128]
input2_offset = rand() % 256 - 127; // range [-127, 128]
output_offset = rand() % 256 - 128; // range [-128, 127]
input1_mult = MULT_MAX / 2 + rand() % INT16_MAX;
input2_mult = MULT_MAX / 2 + rand() % INT16_MAX;
output_mult = MULT_MAX / 2 + rand() % INT16_MAX;
input1_shift = -8 + rand() % 4;
input2_shift = -8 + rand() % 4;
output_shift = -8 + rand() % 4;
left_shift = rand() % 15;
}
#if IDF_HEAP_CAPS
input1_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
input2_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
out_c_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
out_opt_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
input1 = 16 + input1_orig - ((uint32_t) input1_orig & 0xf);
input2 = 16 + input2_orig - ((uint32_t) input2_orig & 0xf);
out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf);
out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf);
#else
input1 = memalign(16, size);
input2 = memalign(16, size);
out_data_c = memalign(16, size);
out_data_opt = memalign(16, size);
input1_orig = input1;
input2_orig = input2;
out_c_orig = out_data_c;
out_opt_orig = out_data_opt;
#endif
if (input1_orig == NULL || input2_orig == NULL || out_c_orig == NULL ||
out_opt_orig == NULL) {
printf(ANSI_COLOR_RED"%s error allocating buffers\n"ANSI_COLOR_RESET, __FUNCTION__);
goto elementwise_add_test_cleanup;
}
for (int i = 0; i < size; ++i) {
input1[i] = rand() % 256 - 128;
input2[i] = rand() % 256 - 128;
}
if (itr == 0) {
/* enable profiler */
profile_c_start();
}
/* C function */
esp_nn_add_elementwise_s8_ansi(input1, input2, input1_offset, input2_offset,
input1_mult, input2_mult, input1_shift, input2_shift,
left_shift, out_data_c, output_offset, output_mult,
output_shift, activation_min, activation_max, size);
if (itr == 0) {
profile_c_end();
profile_opt_start();
}
/* Optimized function */
esp_nn_add_elementwise_s8(input1, input2, input1_offset, input2_offset,
input1_mult, input2_mult, input1_shift, input2_shift,
left_shift, out_data_opt, output_offset, output_mult,
output_shift, activation_min, activation_max, size);
if (itr == 0) {
/* disable profiler */
profile_opt_end();
}
bool ret = CHECK_EQUAL(out_data_c, out_data_opt, size);
if (ret == false) {
printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
printf("Output: \n");
PRINT_ARRAY_HEX(out_data_opt, size, 1);
printf("Expected: \n");
PRINT_ARRAY_HEX(out_data_c, size, 1);
printf("Input1:\n");
PRINT_ARRAY_HEX(input1, size, 1);
printf("Input2:\n");
PRINT_ARRAY_HEX(input2, size, 1);
printf("in1_shift %d, in2_shift %d, left_shift %d, out_shift %d\n",
input1_shift, input2_shift, left_shift, output_shift);
printf("in1_mult %d, in2_mult %d, out_mult %d\n", input1_mult, input2_mult, output_mult);
goto elementwise_add_test_cleanup;
}
printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
elementwise_add_test_cleanup:
if (input1_orig) {
free(input1_orig);
}
if (input2_orig) {
free(input2_orig);
}
if (out_c_orig) {
free(out_c_orig);
}
if (out_opt_orig) {
free(out_opt_orig);
}
}
}
void esp_nn_mul_elementwise_s8_test()
{
/* prepare data */
const int size = 1600 + 8 + 7; /* odd len to test leftover */
int8_t *input1;
int8_t *input2;
int8_t *out_data_c;
int8_t *out_data_opt;
int32_t input1_offset = 34;
int32_t input2_offset = 35;
int32_t output_offset = 36;
int32_t output_shift = -7;
int32_t output_mult = MULT_MAX; // max out_mult
int32_t activation_min = -128;
int32_t activation_max = 127;
int8_t *input1_orig = NULL;
int8_t *input2_orig = NULL;
int8_t *out_c_orig = NULL;
int8_t *out_opt_orig = NULL;
for (int itr = 0; itr < 10; itr++) {
switch (itr) {
case 0: // all zeros
input1_offset = 0;
input2_offset = 0;
output_offset = 0;
output_mult = 0;
output_shift = 0;
break;
case 1: // hit min
input1_offset = -127;
input2_offset = -127;
output_offset = -128;
output_mult = MULT_MIN;
output_shift = 0;
break;
case 2: // hit max
input1_offset = 128;
input2_offset = 128;
output_offset = -127;
output_mult = MULT_MAX;
output_shift = SHIFT_MIN;
break;
case 3: // hit extreme max
input1_offset = 128;
input2_offset = 128;
output_offset = -127;
output_mult = MULT_MAX;
output_shift = 0;
break;
default: // practical random input
input1_offset = rand() % 256 - 127; // range [-127, 128]
input2_offset = rand() % 256 - 127; // range [-127, 128]
output_offset = rand() % 256 - 128; // range [-128, 127]
output_mult = MULT_MAX / 2 + rand() % INT16_MAX;
output_shift = -8 + rand() % 4;
}
#if IDF_HEAP_CAPS
input1_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
input2_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
out_c_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
out_opt_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
input1 = 16 + input1_orig - ((uint32_t) input1_orig & 0xf);
input2 = 16 + input2_orig - ((uint32_t) input2_orig & 0xf);
out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf);
out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf);
#else
input1 = memalign(16, size);
input2 = memalign(16, size);
out_data_c = memalign(16, size);
out_data_opt = memalign(16, size);
input1_orig = input1;
input2_orig = input2;
out_c_orig = out_data_c;
out_opt_orig = out_data_opt;
#endif
if (input1_orig == NULL || input2_orig == NULL || out_c_orig == NULL ||
out_opt_orig == NULL) {
printf(ANSI_COLOR_RED"%s error allocating buffers\n"ANSI_COLOR_RESET, __FUNCTION__);
goto elementwise_mult_test_cleanup;
}
for (int i = 0; i < size; ++i) {
input1[i] = rand() % 256 - 128;
input2[i] = rand() % 256 - 128;
}
if (itr == 0) {
/* enable profiler */
profile_c_start();
}
/* C function */
esp_nn_mul_elementwise_s8_ansi(input1, input2, input1_offset, input2_offset,
out_data_c, output_offset, output_mult, output_shift,
activation_min, activation_max, size);
if (itr == 0) {
profile_c_end();
profile_opt_start();
}
/* Optimized function */
esp_nn_mul_elementwise_s8(input1, input2, input1_offset, input2_offset,
out_data_opt, output_offset, output_mult, output_shift,
activation_min, activation_max, size);
if (itr == 0) {
/* disable profiler */
profile_opt_end();
}
bool ret = CHECK_EQUAL(out_data_c, out_data_opt, size);
if (ret == false) {
printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
printf("Output: \n");
PRINT_ARRAY_HEX(out_data_opt, size, 1);
printf("Expected: \n");
PRINT_ARRAY_HEX(out_data_c, size, 1);
printf("Input1:\n");
PRINT_ARRAY_HEX(input1, size, 1);
printf("Input2:\n");
PRINT_ARRAY_HEX(input2, size, 1);
goto elementwise_mult_test_cleanup;
}
printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
elementwise_mult_test_cleanup:
if (input1_orig) {
free(input1_orig);
}
if (input2_orig) {
free(input2_orig);
}
if (out_c_orig) {
free(out_c_orig);
}
if (out_opt_orig) {
free(out_opt_orig);
}
}
}

View File

@@ -1,605 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <esp_nn.h>
#include "test_utils.h"
#if CONFIG_IDF_CMAKE
#if (CONFIG_SPIRAM_SUPPORT && (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC))
#define IDF_HEAP_CAPS 1
#endif
#if IDF_HEAP_CAPS
#include "esp_heap_caps.h"
#endif
#endif
void esp_nn_depthwise_conv_s8_test()
{
int8_t *input = NULL, *filter_data = NULL, *out_data_c = NULL, *out_data_opt = NULL;
int32_t *bias = NULL;
int32_t input_offset = 5; /* some number in [-128, 127] */
int32_t out_offset = 7;
int32_t activation_min = -125;
int32_t activation_max = 120;
void *scratch_buf = NULL;
/* independent variables */
int input_wd, input_ht, channels;
uint16_t filter_ht, filter_wd, ch_mult;
uint16_t pad_wd, pad_ht, stride_wd, stride_ht;
// run for 15 iterations
for (int itr = 0; itr < 15; itr++) {
/* prepare data */
switch (itr) {
case 0: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0)
input_wd = 18;
input_ht = 18;
filter_ht = 3;
filter_wd = 3;
ch_mult = 1;
channels = 16;
pad_wd = 0;
pad_ht = 0;
stride_wd = 1;
stride_ht = 1;
break;
case 1: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (1,1)
input_wd = 10;
input_ht = 10;
filter_ht = 3;
filter_wd = 3;
ch_mult = 1;
channels = 16;
pad_wd = 1;
pad_ht = 1;
stride_wd = 1;
stride_ht = 1;
break;
case 2: // (ch_mult 1, (channels % 8) = 0), filter (3,3), pad (1,1)
input_wd = 10;
input_ht = 10;
filter_ht = 3;
filter_wd = 3;
ch_mult = 1;
channels = 24;
pad_wd = 1;
pad_ht = 1;
stride_wd = 1;
stride_ht = 1;
break;
case 3: // other filter sizes (ch_mult 1, (channels % 8) = 0)
input_wd = 10;
input_ht = 10;
filter_ht = 3;
filter_wd = 3;
ch_mult = 1;
channels = 24;
pad_wd = 1;
pad_ht = 1;
stride_wd = 1;
stride_ht = 1;
break;
case 4: // other filter sizes (ch_mult 8 = 0)
input_wd = 6;
input_ht = 6;
filter_ht = 3;
filter_wd = 3;
ch_mult = 8;
channels = 4;
pad_wd = 1;
pad_ht = 1;
stride_wd = 1;
stride_ht = 1;
break;
case 5: // other filter sizes (ch_mult 8 = 0)
input_wd = 12;
input_ht = 12;
filter_ht = 5;
filter_wd = 5;
ch_mult = 8;
channels = 4;
pad_wd = 1;
pad_ht = 1;
stride_wd = 1;
stride_ht = 1;
break;
case 6: // other filter sizes (ch_mult 4 = 0)
input_wd = 6;
input_ht = 6;
filter_ht = 3;
filter_wd = 3;
ch_mult = 4;
channels = 4;
pad_wd = 1;
pad_ht = 1;
stride_wd = 1;
stride_ht = 1;
break;
case 7: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0) stride (2,2)
input_wd = 6;
input_ht = 6;
filter_ht = 3;
filter_wd = 3;
ch_mult = 1;
channels = 16;
pad_wd = 0;
pad_ht = 0;
stride_wd = 2;
stride_ht = 2;
break;
case 8: // same as case 7, with large parameters
input_wd = 58;
input_ht = 58;
filter_ht = 3;
filter_wd = 3;
ch_mult = 1;
channels = 128;
pad_wd = 0;
pad_ht = 0;
stride_wd = 2;
stride_ht = 2;
break;
case 9: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0) stride (2,2)
input_wd = 6;
input_ht = 6;
filter_ht = 3;
filter_wd = 3;
ch_mult = 1;
channels = 16;
pad_wd = 0;
pad_ht = 0;
stride_wd = 2;
stride_ht = 2;
break;
default:
input_wd = 6;
input_ht = 6;
filter_ht = 3;
filter_wd = 3;
ch_mult = 1;
channels = 16;
stride_wd = rand() % 2 + 1;
stride_ht = stride_wd;
pad_wd = stride_wd == 1 ? 0 : rand() % 2;
pad_ht = pad_wd;
printf("stride(%d), pad (%d)\t", stride_wd, pad_wd);
break;
}
uint16_t out_wd = (input_wd - filter_wd + 1) / stride_wd;
uint16_t out_ht = (input_ht - filter_ht + 1) / stride_ht;
if (itr == 9) {
// expect the function to handle this gracefully
out_wd += 1;
out_ht += 1;
}
int in_size = input_wd * input_ht * channels;
int out_size = out_wd * out_ht * channels * ch_mult;
int filter_size = filter_wd * filter_ht * channels * ch_mult + 4;
int bias_size = channels * ch_mult + 1;
int32_t out_shift[channels * ch_mult];
int32_t out_mult[channels * ch_mult];
#if IDF_HEAP_CAPS
int8_t *input_orig = (int8_t *) heap_caps_malloc(in_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
int8_t *out_c_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
int8_t *out_opt_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
filter_data = (int8_t *) heap_caps_malloc(filter_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
bias = (int32_t *) heap_caps_malloc(bias_size * 4, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
input = 16 + input_orig - ((uint32_t) input_orig & 0xf);
out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf);
out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf);
#else
input = memalign(16, in_size + 16);
filter_data = memalign(16, filter_size);
out_data_c = memalign(16, out_size + 16);
out_data_opt = memalign(16, out_size + 16);
bias = memalign(16, bias_size * 4);
int8_t *input_orig = input;
int8_t *out_c_orig = out_data_c;
int8_t *out_opt_orig = out_data_opt;
#endif
if (bias == NULL || input == NULL || filter_data == NULL ||
out_data_c == NULL || out_data_opt == NULL || bias == NULL) {
printf(ANSI_COLOR_RED"%s[%d] allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
goto dc_s8_cleanup;
}
/* Generate input data */
for (int i = 0; i < in_size; ++i) {
input[i] = rand() % 128;
}
/* Generate filter data */
for (int i = 0; i < filter_size; ++i) {
filter_data[i] = rand() % 256 - 128;
}
/* Generate bias data */
for (int i = 0; i < channels * ch_mult; ++i) {
bias[i + 1] = rand() % INT16_MAX; //0th index left for unalignment
out_shift[i] = -8 + rand() % 3;
out_mult[i] = 0x7eb0e200 + rand() % 50;
}
data_dims_t input_dims = {.width = input_wd, .height = input_ht, .channels = channels, 1};
data_dims_t output_dims = {.width = out_wd, .height = out_ht, .channels = channels * ch_mult, 1};
data_dims_t filter_dims = {.width = filter_wd, .height = filter_ht, 0, 0};
dw_conv_params_t conv_params = {.in_offset = input_offset, .out_offset = out_offset, .ch_mult = ch_mult,
.stride = {stride_wd, stride_ht}, .padding = {pad_wd, pad_ht},
.dilation = {0, 0}, .activation = {activation_min, activation_max}};
quant_data_t quant_data = {.shift = out_shift, .mult = out_mult};
int scratch_buf_size = esp_nn_get_depthwise_conv_scratch_size(&input_dims, &filter_dims,
&output_dims, &conv_params);
if (scratch_buf_size > 0) {
#if IDF_HEAP_CAPS
scratch_buf = heap_caps_malloc(scratch_buf_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
int align_sz = 16 - (((int32_t) scratch_buf) & 0xf);
#else
scratch_buf = memalign(16, scratch_buf_size);
int align_sz = 0;
#endif
if (scratch_buf == NULL) {
printf(ANSI_COLOR_RED"%s[%d] scratch_buf alloc failed size %d\n"ANSI_COLOR_RESET,
__FUNCTION__, itr, scratch_buf_size);
goto dc_s8_cleanup;
}
esp_nn_set_depthwise_conv_scratch_buf(scratch_buf + align_sz);
}
if (itr == 0) {
/* enable profiler */
profile_c_start();
}
/* C function */
esp_nn_depthwise_conv_s8_ansi(&input_dims, input, &filter_dims, filter_data + 4,
bias + 1, &output_dims, out_data_c, &conv_params, &quant_data);
if (itr == 0) {
profile_c_end();
profile_opt_start();
}
/* Optimized function */
esp_nn_depthwise_conv_s8(&input_dims, input, &filter_dims, filter_data + 4,
bias + 1, &output_dims, out_data_opt, &conv_params, &quant_data);
if (itr == 0) {
/* disable profiler */
profile_opt_end();
}
bool ret = CHECK_EQUAL(out_data_c, out_data_opt, out_size);
if (ret == false) {
printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
printf("Output: \n");
PRINT_ARRAY_HEX(out_data_opt, out_size / out_ht, out_ht);
printf("Expected: \n");
PRINT_ARRAY_HEX(out_data_c, out_size / out_ht, out_ht);
printf("Input:\n");
PRINT_ARRAY_HEX(input, in_size / input_ht, input_ht);
printf("Filter data:\n");
PRINT_ARRAY_HEX(filter_data + 4, (filter_size - 4) / filter_ht, filter_ht);
printf("bias data:\n");
PRINT_ARRAY_INT(bias + 1, ch_mult * channels, 1);
goto dc_s8_cleanup;
}
printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
dc_s8_cleanup:
if (input) {
free(input_orig);
}
if (filter_data) {
free(filter_data);
}
if (out_data_c) {
free(out_c_orig);
}
if (out_data_opt) {
free(out_opt_orig);
}
if (bias) {
free(bias);
}
if (scratch_buf) {
free(scratch_buf);
}
}
}
void esp_nn_conv_s8_test()
{
const int32_t input_offset = 5; /* some number in [-128, 127] */
const int32_t activation_min = -125;
const int32_t activation_max = 122;
const int32_t out_offset = 3;
void *scratch_buf = NULL;
int8_t *input_orig;
int8_t *out_c_orig;
int8_t *out_opt_orig;
int8_t *filter_data;
int32_t *bias;
/* independent variable */
int in_wd, in_ht, in_channels, out_channels;
uint16_t filter_ht, filter_wd;
uint16_t pad_wd, pad_ht, stride_wd, stride_ht;
// run for 10 iterations
for (int itr = 0; itr < 10; itr++) {
switch (itr) {
case 0: // ch % 8 == 0 && filter (1,1), padding (0,0)
in_wd = 10;
in_ht = 10;
in_channels = 64;
out_channels = 64;
filter_ht = 1;
filter_wd = 1;
pad_wd = 0;
pad_ht = 0;
stride_wd = 1;
stride_ht = 1;
break;
case 1: // ch % 4 == 0 && (in_wd * in_ht) % 16 == 0
in_wd = 4;
in_ht = 4;
in_channels = 20;
out_channels = 8;
filter_ht = 1;
filter_wd = 1;
pad_wd = 0;
pad_ht = 0;
stride_wd = 1;
stride_ht = 1;
break;
case 2: // ch, filter (3x3x3)
in_wd = 10;
in_ht = 10;
in_channels = 3;
out_channels = 64;
filter_ht = 3;
filter_wd = 3;
pad_wd = 0;
pad_ht = 0;
stride_wd = 1;
stride_ht = 1;
break;
case 3: // remaining pad (0, 0)
in_wd = 10;
in_ht = 10;
in_channels = 3;
out_channels = 64;
filter_ht = 1;
filter_wd = 1;
pad_wd = 0;
pad_ht = 0;
stride_wd = 1;
stride_ht = 1;
break;
case 4: // unopt case
in_wd = 10;
in_ht = 10;
in_channels = 12;
out_channels = 64;
filter_ht = 3;
filter_wd = 3;
pad_wd = 1;
pad_ht = 1;
stride_wd = 1;
stride_ht = 1;
break;
case 5: // ch % 8 == 0 & stride (2,2)
in_wd = 16;
in_ht = 16;
in_channels = 16;
out_channels = 16;
filter_ht = 1;
filter_wd = 1;
pad_wd = 0;
pad_ht = 0;
stride_wd = 2;
stride_ht = 2;
break;
case 6: // ch % 8 == 0 && filter (1,1), padding (0,0)
in_wd = 2;
in_ht = 2;
in_channels = 8;
out_channels = 8;
filter_ht = 1;
filter_wd = 1;
pad_wd = 0;
pad_ht = 0;
stride_wd = 1;
stride_ht = 1;
break;
default: // ch % 8 == 0
in_wd = 8;
in_ht = 8;
in_channels = 16;
out_channels = 16;
filter_ht = 1;
filter_wd = 1;
pad_wd = 0;
pad_ht = 0;
stride_wd = 1;
stride_ht = 1;
break;
}
/* prepare data */
uint16_t out_wd = (in_wd - filter_wd + 1) / stride_wd;
uint16_t out_ht = (in_ht - filter_ht + 1) / stride_ht;
int in_size = in_wd * in_ht * in_channels;
int filter_size = filter_wd * filter_ht * in_channels * out_channels + 2;
int out_size = out_wd * out_ht * out_channels;
#if IDF_HEAP_CAPS
input_orig = (int8_t *) heap_caps_malloc(in_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
out_c_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
out_opt_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
filter_data = (int8_t *) heap_caps_malloc(filter_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
bias = (int32_t *) heap_caps_malloc(128 + sizeof (int32_t) * out_channels, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
int8_t *input = 16 + input_orig - ((uint32_t) input_orig & 0xf);
int8_t *out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf);
int8_t *out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf);
#else
int8_t *input = memalign(16, in_size);
int8_t *out_data_c = memalign(16, out_size);
int8_t *out_data_opt = memalign(16, out_size);
filter_data = memalign(16, filter_size);
bias = calloc(1, 128 + sizeof (int32_t) * out_channels);
input_orig = input;
out_c_orig = out_data_c;
out_opt_orig = out_data_opt;
#endif
int32_t *out_shift = calloc(1, 128 + sizeof (int32_t) * out_channels);
int32_t *out_mult = calloc(1, 128 + sizeof (int32_t) * out_channels);
if (input == NULL || filter_data == NULL ||
out_data_c == NULL || out_data_opt == NULL) {
printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__);
goto conv_s8_cleanup;
}
if (bias == NULL || out_shift == NULL || out_mult == NULL) {
printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__);
goto conv_s8_cleanup;
}
/* Generate input data between -128 -> +127 */
for (int i = 0; i < in_size; ++i) {
input[i] = rand() % 255 - 128;
}
/* Generate filter data between -128 -> +127 */
for (int i = 0; i < filter_size; ++i) {
filter_data[i] = rand() % 256 - 128;
}
/* Generate bias data */
for (int i = 0; i < out_channels; ++i) {
bias[i] = (int32_t)rand() % UINT16_MAX + UINT8_MAX;
}
/* Shift and multiplier */
for (int i = 0; i < out_channels; ++i) {
out_shift[i] = -10 + rand() % 2;
out_mult[i] = 0x7f67f4f8 + rand() % 50;
}
data_dims_t input_dims = {.width = in_wd, .height = in_ht, .channels = in_channels, 1};
data_dims_t output_dims = {.width = out_wd, .height = out_ht, .channels = out_channels, 1};
data_dims_t filter_dims = {.width = filter_wd, .height = filter_ht, 0, 0};
conv_params_t conv_params = {.in_offset = input_offset, .out_offset = out_offset,
.stride = {stride_wd, stride_ht}, .padding = {pad_wd, pad_ht},
.dilation = {0, 0}, .activation = {activation_min, activation_max}};
quant_data_t quant_data = {.shift = out_shift, .mult = out_mult};
int scratch_buf_size = esp_nn_get_conv_scratch_size(&input_dims, &filter_dims,
&output_dims, &conv_params);
if (scratch_buf_size > 0) {
#if IDF_HEAP_CAPS
void *scratch_buf = heap_caps_malloc(scratch_buf_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
int align_sz = 16 - (((int32_t) scratch_buf) & 0xf);
#else
void *scratch_buf = memalign(16, scratch_buf_size);
int align_sz = 0;
#endif
if (scratch_buf == NULL) {
printf(ANSI_COLOR_RED"%s scratch_buf alloc failed size %d\n"ANSI_COLOR_RESET, __FUNCTION__, scratch_buf_size);
goto conv_s8_cleanup;
}
esp_nn_set_conv_scratch_buf(scratch_buf + align_sz);
}
if (itr == 0) {
/* enable profiler */
profile_c_start();
}
/* C function */
esp_nn_conv_s8_ansi(&input_dims, input, &filter_dims, filter_data + 2,
bias, &output_dims, out_data_c, &conv_params, &quant_data);
if (itr == 0) {
profile_c_end();
profile_opt_start();
}
/* Optimized function */
esp_nn_conv_s8(&input_dims, input, &filter_dims, filter_data + 2,
bias, &output_dims, out_data_opt, &conv_params, &quant_data);
if (itr == 0) {
/* disable profiler */
profile_opt_end();
}
bool ret = CHECK_EQUAL(out_data_c, out_data_opt, out_size);
if (ret == false) {
printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
printf("Output: \n");
PRINT_ARRAY_HEX(out_data_opt, out_size / out_ht, out_ht);
printf("Expected: \n");
PRINT_ARRAY_HEX(out_data_c, out_size / out_ht, out_ht);
printf("Input:\n");
PRINT_ARRAY_HEX(input, in_size / in_ht, in_ht);
printf("Filter data:\n");
PRINT_ARRAY_HEX(filter_data + 2, (filter_size - 2) / filter_ht, filter_ht);
printf("bias data:\n");
PRINT_ARRAY_INT(bias, out_channels, 1);
goto conv_s8_cleanup;
}
printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
conv_s8_cleanup:
if (input) {
free(input_orig);
}
if (filter_data) {
free(filter_data);
}
if (out_data_c) {
free(out_c_orig);
}
if (out_data_opt) {
free(out_opt_orig);
}
if (bias) {
free(bias);
}
if (out_shift) {
free(out_shift);
}
if (out_mult) {
free(out_mult);
}
if (scratch_buf) {
free(scratch_buf);
}
}
}

View File

@@ -1,111 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <esp_nn.h>
#include "test_utils.h"
void esp_nn_fully_connected_s8_test()
{
/* prepare data */
static uint16_t row_len = 256 + 8 + 7; /* odd len to test unaligned+left-over */
static uint16_t out_channels = 3;
int8_t input[row_len];
int8_t filter_data[row_len * out_channels];
int8_t output_c[out_channels], output_opt[out_channels];
static int32_t activation_min = -128;
static int32_t activation_max = 127;
static int32_t input_offset = 0;
static int32_t filter_offset = 0;
int32_t out_shift = -10;
static int32_t out_offset = 127;
int32_t out_mult = 0x59e492c4;
for (int itr = 0; itr < 5; itr++) {
out_mult = INT32_MAX / row_len + rand() % INT16_MAX;
switch (itr) {
case 0:
out_shift = -10;
break;
case 1:
out_shift = SHIFT_MIN;
break;
case 2:
out_shift = SHIFT_MAX;
break;
case 3:
out_shift = 0;
break;
default:
out_shift = -10 + rand() % 5;
break;
}
if (itr == 0) {
out_shift = SHIFT_MAX;
}
/* Generate input and filter data */
for (int i = 0; i < row_len; ++i) {
input[i] = rand() % 256 - 128;
}
for (int i = 0; i < row_len * out_channels; ++i) {
filter_data[i] = rand() % 256 - 128;
}
if (itr == 0) {
/* enable profiler */
profile_c_start();
}
/* C function */
esp_nn_fully_connected_s8_ansi(input, input_offset, row_len, filter_data, filter_offset,
NULL, output_c, out_channels, out_offset, out_shift, out_mult,
activation_min, activation_max);
if (itr == 0) {
profile_c_end();
profile_opt_start();
}
/* Optimized function */
esp_nn_fully_connected_s8(input, input_offset, row_len, filter_data, filter_offset,
NULL, output_opt, out_channels, out_offset, out_shift, out_mult,
activation_min, activation_max);
if (itr == 0) {
/* disable profiler */
profile_opt_end();
}
bool ret = CHECK_EQUAL(output_c, output_opt, out_channels);
if (ret == false) {
printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
printf("Output: \n");
PRINT_ARRAY_HEX(output_opt, out_channels, 1);
printf("Expected: \n");
PRINT_ARRAY_HEX(output_c, out_channels, 1);
printf("Input:\n");
PRINT_ARRAY_HEX(input, row_len, 1);
printf("Filter data:\n");
PRINT_ARRAY_HEX(filter_data, row_len, out_channels);
printf("Out shift: %d\n", out_shift);
printf("Out mult: %x\n", out_mult);
return;
}
printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr);
}
}

View File

@@ -1,184 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <esp_nn.h>
#include "test_utils.h"
void esp_nn_avg_pool_s8_test()
{
/* prepare data */
const uint16_t input_wd = 16;
const uint16_t input_ht = 16;
const uint16_t channels = 16; /* With TFLite example, I have seen it 256 */
const int size = input_wd * input_ht * channels;
int8_t *input, *output_c, *output_opt;
const int32_t activation_min = -128;
const int32_t activation_max = 127;
const uint16_t pad_wd = 1;
const uint16_t pad_ht = 1;
const uint16_t stride_wd = 1;
const uint16_t stride_ht = 1;
const uint16_t filter_ht = 3;
const uint16_t filter_wd = 3;
const uint16_t out_wd = input_wd / stride_wd;
const uint16_t out_ht = input_ht / stride_ht;
const int out_size = out_wd * out_ht * channels;
input = memalign(16, size);
output_c = memalign(16, out_size);
output_opt = memalign(16, out_size);
if (input == NULL || output_c == NULL || output_opt == NULL) {
printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__);
goto avg_pool_s8_cleanup;
}
/**
* width/height, channels etc look suspicious but it it true.
* It actually depends upon where in model this is actually placed.
* If at the end wd/ht tends to be smaller and depth larger.
*/
for (int i = 0; i < size; ++i) {
input[i] = rand() % 256 - 128;
}
/* enable profiler */
profile_c_start();
/* C function */
esp_nn_avg_pool_s8_ansi(input, input_wd, input_ht, output_c, out_wd, out_ht,
stride_wd, stride_ht, filter_wd, filter_ht, pad_wd, pad_ht,
activation_min, activation_max, channels);
profile_c_end();
profile_opt_start();
/* Optimized function */
esp_nn_avg_pool_s8(input, input_wd, input_ht, output_opt, out_wd, out_ht,
stride_wd, stride_ht, filter_wd, filter_ht, pad_wd, pad_ht,
activation_min, activation_max, channels);
/* disable profiler */
profile_opt_end();
bool ret = CHECK_EQUAL(output_c, output_opt, out_size);
if (ret == false) {
printf(ANSI_COLOR_RED"%s failed\n"ANSI_COLOR_RESET, __FUNCTION__);
printf("Output: \n");
PRINT_ARRAY_HEX(output_opt, out_wd * channels, out_ht);
printf("Expected: \n");
PRINT_ARRAY_HEX(output_c, out_wd * channels, out_ht);
printf("Input:\n");
PRINT_ARRAY_HEX(input, input_wd * channels, input_ht);
goto avg_pool_s8_cleanup;
}
printf(ANSI_COLOR_GREEN"%s passed\n"ANSI_COLOR_RESET, __FUNCTION__);
avg_pool_s8_cleanup:
if (input) {
free(input);
}
if (output_c) {
free(output_c);
}
if (output_opt) {
free(output_opt);
}
}
void esp_nn_max_pool_s8_test()
{
/* prepare data */
const uint16_t input_wd = 16;
const uint16_t input_ht = 16;
const uint16_t channels = 16; /* With TFLite example, I have seen it 256 */
int8_t *input, *output_c, *output_opt;
const int size = input_wd * input_ht * channels;
const int32_t activation_min = -128;
const int32_t activation_max = 127;
const uint16_t pad_wd = 1;
const uint16_t pad_ht = 1;
const uint16_t stride_wd = 1;
const uint16_t stride_ht = 1;
const uint16_t filter_ht = 3;
const uint16_t filter_wd = 3;
const uint16_t out_wd = input_wd / stride_wd;
const uint16_t out_ht = input_ht / stride_ht;
const int out_size = out_wd * out_ht * channels;
input = memalign(16, size);
output_c = memalign(16, out_size);
output_opt = memalign(16, out_size);
if (input == NULL || output_c == NULL || output_opt == NULL) {
printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__);
goto max_pool_s8_cleanup;
}
for (int i = 0; i < size; ++i) {
input[i] = rand() % 256 - 128;
}
/* enable profiler */
profile_c_start();
/* C function */
esp_nn_max_pool_s8_ansi(input, input_wd, input_ht, output_c, out_wd, out_ht,
stride_wd, stride_ht, filter_wd, filter_ht, pad_wd, pad_ht,
activation_min, activation_max, channels);
profile_c_end();
profile_opt_start();
/* Optimized function */
esp_nn_max_pool_s8(input, input_wd, input_ht, output_opt, out_wd, out_ht,
stride_wd, stride_ht, filter_wd, filter_ht, pad_wd, pad_ht,
activation_min, activation_max, channels);
/* disable profiler */
profile_opt_end();
bool ret = CHECK_EQUAL(output_c, output_opt, out_wd * out_ht * channels);
if (ret == false) {
printf(ANSI_COLOR_RED"%s failed\n"ANSI_COLOR_RESET, __FUNCTION__);
printf("Output: \n");
PRINT_ARRAY_HEX(output_opt, out_wd * out_ht * channels, 1);
printf("Expected: \n");
PRINT_ARRAY_HEX(output_c, out_wd * out_ht * channels, 1);
printf("Input:\n");
PRINT_ARRAY_HEX(input, 8, size / 8);
goto max_pool_s8_cleanup;
}
printf(ANSI_COLOR_GREEN"%s passed\n"ANSI_COLOR_RESET, __FUNCTION__);
max_pool_s8_cleanup:
if (input) {
free(input);
}
if (output_c) {
free(output_c);
}
if (output_opt) {
free(output_opt);
}
}

View File

@@ -1,83 +0,0 @@
// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <esp_nn.h>
#include "test_utils.h"
void esp_nn_relu6_s8_test()
{
const int size = 1600 + 8 + 7;
int8_t *input, *inout_ansi, *inout_opt;
input = memalign(16, size);
inout_ansi = memalign(16, size);
inout_opt = memalign(16, size);
if (input == NULL || inout_ansi == NULL || inout_opt == NULL) {
printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__);
goto relu6_s8_cleanup;
}
/* Generate filter data between -128 -> +127 */
for (int i = 0; i < size; ++i) {
input[i] = rand() % 255 - 128;
inout_ansi[i] = input[i];
inout_opt[i] = input[i];
}
/* enable profiler */
profile_c_start();
/* C function */
esp_nn_relu6_s8_ansi(inout_ansi, size);
profile_c_end();
profile_opt_start();
/* Optimized function */
esp_nn_relu6_s8(inout_opt, size);
/* disable profiler */
profile_opt_end();
bool ret = CHECK_EQUAL(inout_ansi, inout_opt, size);
if (ret == false) {
printf(ANSI_COLOR_RED"%s failed\n"ANSI_COLOR_RESET, __FUNCTION__);
printf("Output: \n");
PRINT_ARRAY_HEX(inout_opt, size, 1);
printf("Expected: \n");
PRINT_ARRAY_HEX(inout_ansi, size, 1);
printf("Input:\n");
PRINT_ARRAY_HEX(input, size, 1);
goto relu6_s8_cleanup;
}
printf(ANSI_COLOR_GREEN"%s passed\n"ANSI_COLOR_RESET, __FUNCTION__);
relu6_s8_cleanup:
if (input) {
free (input);
}
if (inout_ansi) {
free (inout_ansi);
}
if (inout_opt) {
free (inout_opt);
}
}

View File

@@ -1,101 +0,0 @@
// Copyright 2022 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <esp_nn.h>
#include "test_utils.h"
void esp_nn_softmax_s8_test()
{
const int32_t height = 8;
const int32_t width = 32;
const int32_t diff_min = -128;
const int32_t mult = INT32_MAX / 2;
const int32_t shift = 7;
void *scratch_buf = NULL;
const int size = width * height;
int8_t *input, *out_ansi, *out_opt;
input = memalign(16, size);
out_ansi = memalign(16, size);
out_opt = memalign(16, size);
if (input == NULL || out_ansi == NULL || out_opt == NULL) {
printf(ANSI_COLOR_RED"%s buffer allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__);
goto softmax_s8_cleanup;
}
/* Generate input data between -128 -> +127 */
for (int i = 0; i < size; ++i) {
input[i] = rand() % 255 - 128;
}
/* enable profiler */
profile_c_start();
/* C function */
esp_nn_softmax_s8_ansi(input, height, width, mult, shift, diff_min, out_ansi);
profile_c_end();
int32_t scratch_buf_size = esp_nn_get_softmax_scratch_size(width, height);
if (scratch_buf_size) {
scratch_buf = memalign(4, scratch_buf_size);
if (scratch_buf == NULL) {
printf(ANSI_COLOR_RED"%s scratch_buf alloc failed size %d\n"ANSI_COLOR_RESET, __FUNCTION__, scratch_buf_size);
goto softmax_s8_cleanup;
}
esp_nn_set_softmax_scratch_buf(scratch_buf);
}
profile_opt_start();
/* Optimized function */
esp_nn_softmax_s8(input, height, width, mult, shift, diff_min, out_opt);
/* disable profiler */
profile_opt_end();
bool ret = CHECK_EQUAL(out_ansi, out_opt, size);
if (ret == false) {
printf(ANSI_COLOR_RED"%s failed\n"ANSI_COLOR_RESET, __FUNCTION__);
printf("Output: \n");
PRINT_ARRAY_HEX(out_opt, width, height);
printf("Expected: \n");
PRINT_ARRAY_HEX(out_ansi, width, height);
printf("Input:\n");
PRINT_ARRAY_HEX(input, width, height);
goto softmax_s8_cleanup;
}
printf(ANSI_COLOR_GREEN"%s passed\n"ANSI_COLOR_RESET, __FUNCTION__);
softmax_s8_cleanup:
if (input) {
free (input);
}
if (out_ansi) {
free (out_ansi);
}
if (out_opt) {
free (out_opt);
}
if (scratch_buf) {
free (scratch_buf);
}
}