From b06b42f0e9f07d47f0f4c417d6f7dd13816fc9ea Mon Sep 17 00:00:00 2001 From: CaCO3 Date: Fri, 23 Sep 2022 21:17:44 +0200 Subject: [PATCH] removed esp-nn --- code/components/esp-nn/.gitignore | 57 -- code/components/esp-nn/.gitlab-ci.yml | 55 -- code/components/esp-nn/CMakeLists.txt | 50 -- code/components/esp-nn/Kconfig.projbuild | 29 - code/components/esp-nn/LICENSE | 202 ------ code/components/esp-nn/README.md | 55 -- code/components/esp-nn/include/esp_nn.h | 46 -- .../components/esp-nn/include/esp_nn_ansi_c.h | 47 -- .../esp-nn/include/esp_nn_ansi_headers.h | 309 --------- code/components/esp-nn/include/esp_nn_defs.h | 83 --- .../esp-nn/include/esp_nn_esp32s3.h | 231 ------- .../esp-nn/include/esp_nn_generic_opt.h | 47 -- .../activation_functions/esp_nn_relu_ansi.c | 30 - .../esp-nn/src/basic_math/esp_nn_add_ansi.c | 97 --- .../esp-nn/src/basic_math/esp_nn_mul_ansi.c | 42 -- .../esp-nn/src/common/common_functions.h | 255 -------- .../esp-nn/src/convolution/esp_nn_conv_ansi.c | 179 ------ .../src/convolution/esp_nn_conv_esp32s3.c | 463 -------------- .../esp-nn/src/convolution/esp_nn_conv_opt.c | 179 ------ .../convolution/esp_nn_depthwise_conv_ansi.c | 100 --- .../convolution/esp_nn_depthwise_conv_opt.c | 291 --------- .../esp_nn_depthwise_conv_s8_esp32s3.c | 543 ---------------- .../esp_nn_fully_connected_ansi.c | 50 -- .../esp-nn/src/pooling/esp_nn_avg_pool_ansi.c | 72 --- .../esp-nn/src/pooling/esp_nn_max_pool_ansi.c | 66 -- .../esp-nn/src/softmax/esp_nn_softmax_ansi.c | 88 --- .../esp-nn/src/softmax/esp_nn_softmax_opt.c | 108 ---- .../esp-nn/src/softmax/softmax_common.h | 104 --- .../components/esp-nn/test_app/CMakeLists.txt | 9 - .../esp-nn/test_app/main/CMakeLists.txt | 7 - .../esp-nn/test_app/main/component.mk | 8 - code/components/esp-nn/test_app/main/main.c | 87 --- .../esp-nn/test_app/sdkconfig.defaults | 5 - .../test_app/sdkconfig.defaults.esp32s3 | 8 - code/components/esp-nn/tests/CMakeLists.txt | 15 - code/components/esp-nn/tests/README.md | 4 - code/components/esp-nn/tests/component.mk | 5 - .../esp-nn/tests/include/test_functions.h | 48 -- .../esp-nn/tests/include/test_utils.h | 87 --- .../esp-nn/tests/src/basic_math_test.c | 355 ---------- .../esp-nn/tests/src/convolution_test.c | 605 ------------------ .../esp-nn/tests/src/fully_connected_test.c | 111 ---- .../esp-nn/tests/src/pooling_test.c | 184 ------ code/components/esp-nn/tests/src/relu_test.c | 83 --- .../esp-nn/tests/src/softmax_test.c | 101 --- 45 files changed, 5600 deletions(-) delete mode 100644 code/components/esp-nn/.gitignore delete mode 100644 code/components/esp-nn/.gitlab-ci.yml delete mode 100644 code/components/esp-nn/CMakeLists.txt delete mode 100644 code/components/esp-nn/Kconfig.projbuild delete mode 100644 code/components/esp-nn/LICENSE delete mode 100644 code/components/esp-nn/README.md delete mode 100644 code/components/esp-nn/include/esp_nn.h delete mode 100644 code/components/esp-nn/include/esp_nn_ansi_c.h delete mode 100644 code/components/esp-nn/include/esp_nn_ansi_headers.h delete mode 100644 code/components/esp-nn/include/esp_nn_defs.h delete mode 100644 code/components/esp-nn/include/esp_nn_esp32s3.h delete mode 100644 code/components/esp-nn/include/esp_nn_generic_opt.h delete mode 100644 code/components/esp-nn/src/activation_functions/esp_nn_relu_ansi.c delete mode 100644 code/components/esp-nn/src/basic_math/esp_nn_add_ansi.c delete mode 100644 code/components/esp-nn/src/basic_math/esp_nn_mul_ansi.c delete mode 100644 code/components/esp-nn/src/common/common_functions.h delete mode 100644 code/components/esp-nn/src/convolution/esp_nn_conv_ansi.c delete mode 100644 code/components/esp-nn/src/convolution/esp_nn_conv_esp32s3.c delete mode 100644 code/components/esp-nn/src/convolution/esp_nn_conv_opt.c delete mode 100644 code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_ansi.c delete mode 100644 code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_opt.c delete mode 100644 code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c delete mode 100644 code/components/esp-nn/src/fully_connected/esp_nn_fully_connected_ansi.c delete mode 100644 code/components/esp-nn/src/pooling/esp_nn_avg_pool_ansi.c delete mode 100644 code/components/esp-nn/src/pooling/esp_nn_max_pool_ansi.c delete mode 100644 code/components/esp-nn/src/softmax/esp_nn_softmax_ansi.c delete mode 100644 code/components/esp-nn/src/softmax/esp_nn_softmax_opt.c delete mode 100644 code/components/esp-nn/src/softmax/softmax_common.h delete mode 100644 code/components/esp-nn/test_app/CMakeLists.txt delete mode 100644 code/components/esp-nn/test_app/main/CMakeLists.txt delete mode 100644 code/components/esp-nn/test_app/main/component.mk delete mode 100644 code/components/esp-nn/test_app/main/main.c delete mode 100644 code/components/esp-nn/test_app/sdkconfig.defaults delete mode 100644 code/components/esp-nn/test_app/sdkconfig.defaults.esp32s3 delete mode 100644 code/components/esp-nn/tests/CMakeLists.txt delete mode 100644 code/components/esp-nn/tests/README.md delete mode 100644 code/components/esp-nn/tests/component.mk delete mode 100644 code/components/esp-nn/tests/include/test_functions.h delete mode 100644 code/components/esp-nn/tests/include/test_utils.h delete mode 100644 code/components/esp-nn/tests/src/basic_math_test.c delete mode 100644 code/components/esp-nn/tests/src/convolution_test.c delete mode 100644 code/components/esp-nn/tests/src/fully_connected_test.c delete mode 100644 code/components/esp-nn/tests/src/pooling_test.c delete mode 100644 code/components/esp-nn/tests/src/relu_test.c delete mode 100644 code/components/esp-nn/tests/src/softmax_test.c diff --git a/code/components/esp-nn/.gitignore b/code/components/esp-nn/.gitignore deleted file mode 100644 index 08ca72b5..00000000 --- a/code/components/esp-nn/.gitignore +++ /dev/null @@ -1,57 +0,0 @@ -.config -*.o -*.i -*.s -*.orig -*.pyc - -# gtags -GTAGS -GRTAGS -GPATH - -# emacs -.dir-locals.el - -# emacs temp file suffixes -*~ -.#* -\#*# - -# eclipse setting -.settings - -# MacOS directory files -.DS_Store - -# Example project files -examples/**/sdkconfig -examples/**/sdkconfig.old -examples/**/build - -# Test app files -test_app/build -test_app/sdkconfig -test_app/sdkconfig.old - -# Doc build artifacts -docs/_build/ -docs/doxygen-warning-log.txt -docs/sphinx-warning-log.txt -docs/sphinx-warning-log-sanitized.txt -docs/xml/ -docs/xml_in/ -docs/man/ -docs/doxygen_sqlite3.db - -TEST_LOGS - - -# gcov coverage reports -*.gcda -*.gcno -coverage.info -coverage_report/ - -# VS Code Settings -.vscode/ diff --git a/code/components/esp-nn/.gitlab-ci.yml b/code/components/esp-nn/.gitlab-ci.yml deleted file mode 100644 index 6b540bda..00000000 --- a/code/components/esp-nn/.gitlab-ci.yml +++ /dev/null @@ -1,55 +0,0 @@ -stages: - - build - -variables: - BATCH_BUILD: "1" - V: "0" - MAKEFLAGS: "-j8 --no-keep-going" - IDF_PATH: "$CI_PROJECT_DIR/esp-idf" - LOG_PATH: "$CI_PROJECT_DIR" - -.set_git_config: &set_git_config - # Set git config - - git config user.email "test@espressif.com" - - git config user.name "Espressif" - -.add_ssh_key: &add_ssh_key - # Add gitlab ssh key - - mkdir -p ~/.ssh - - chmod 700 ~/.ssh - - echo -n $GITLAB_KEY > ~/.ssh/id_rsa_base64 - - base64 --decode --ignore-garbage ~/.ssh/id_rsa_base64 > ~/.ssh/id_rsa - - chmod 600 ~/.ssh/id_rsa - - echo -e "Host gitlab.espressif.cn\n\tStrictHostKeyChecking no\n" >> ~/.ssh/config - -before_script: - # Add gitlab ssh key - - *add_ssh_key - # Set git config - - *set_git_config - -.build_esp32s3: &build_esp32s3 - - idf.py set-target esp32s3 build - -.build_esp32: &build_esp32 - - idf.py set-target esp32 build - -build_demo: - stage: build - image: $CI_DOCKER_REGISTRY/esp32-ci-env:esp-nn - tags: - - build - script: - # Clone IDF - - git clone --recursive --single-branch -b release/v4.4 --reference-if-able /local_references/gitlab/ https://gitlab-ci-token:${BOT_TOKEN}@gitlab.espressif.cn:6688/espressif/esp-idf.git - - cd esp-idf - - ./install.sh - - . ./export.sh - - cd .. - # Build examples now - - cd test_app - # Build esp32s3 - - *build_esp32s3 - # Build esp32 - - *build_esp32 - - cd - diff --git a/code/components/esp-nn/CMakeLists.txt b/code/components/esp-nn/CMakeLists.txt deleted file mode 100644 index ba45866a..00000000 --- a/code/components/esp-nn/CMakeLists.txt +++ /dev/null @@ -1,50 +0,0 @@ -idf_build_get_property(idf_target IDF_TARGET) - -set(c_srcs - "src/activation_functions/esp_nn_relu_ansi.c" - "src/basic_math/esp_nn_add_ansi.c" - "src/basic_math/esp_nn_mul_ansi.c" - "src/convolution/esp_nn_conv_ansi.c" - "src/convolution/esp_nn_conv_opt.c" - "src/convolution/esp_nn_depthwise_conv_ansi.c" - "src/convolution/esp_nn_depthwise_conv_opt.c" - "src/fully_connected/esp_nn_fully_connected_ansi.c" - "src/softmax/esp_nn_softmax_ansi.c" - "src/softmax/esp_nn_softmax_opt.c" - "src/pooling/esp_nn_avg_pool_ansi.c" - "src/pooling/esp_nn_max_pool_ansi.c") - -if(CONFIG_IDF_TARGET_ESP32S3) - set(s3_srcs - "src/common/esp_nn_common_functions_esp32s3.S" - "src/common/esp_nn_multiply_by_quantized_mult_esp32s3.S" - "src/common/esp_nn_multiply_by_quantized_mult_ver1_esp32s3.S" - "src/activation_functions/esp_nn_relu_s8_esp32s3.S" - "src/basic_math/esp_nn_add_s8_esp32s3.S" - "src/basic_math/esp_nn_mul_s8_esp32s3.S" - "src/convolution/esp_nn_conv_esp32s3.c" - "src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c" - "src/convolution/esp_nn_conv_s16_mult8_esp32s3.S" - "src/convolution/esp_nn_conv_s8_mult8_1x1_esp32s3.S" - "src/convolution/esp_nn_conv_s16_mult4_1x1_esp32s3.S" - "src/convolution/esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3.S" - "src/convolution/esp_nn_depthwise_conv_s16_mult1_esp32s3.S" - "src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3.S" - "src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3.S" - "src/convolution/esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3.S" - "src/convolution/esp_nn_depthwise_conv_s16_mult4_esp32s3.S" - "src/convolution/esp_nn_depthwise_conv_s16_mult8_esp32s3.S" - "src/fully_connected/esp_nn_fully_connected_s8_esp32s3.S" - "src/pooling/esp_nn_max_pool_s8_esp32s3.S" - "src/pooling/esp_nn_avg_pool_s8_esp32s3.S") -endif() - -idf_component_register(SRCS "${c_srcs}" - "${s3_srcs}" - INCLUDE_DIRS "include" "src/common") - -if(CONFIG_IDF_TARGET_ESP32S3) - target_compile_options(${COMPONENT_LIB} PRIVATE -mlongcalls -fno-unroll-loops -O2 -Wno-unused-function) -else() - target_compile_options(${COMPONENT_LIB} PRIVATE -Wno-unused-function) -endif() \ No newline at end of file diff --git a/code/components/esp-nn/Kconfig.projbuild b/code/components/esp-nn/Kconfig.projbuild deleted file mode 100644 index a146305b..00000000 --- a/code/components/esp-nn/Kconfig.projbuild +++ /dev/null @@ -1,29 +0,0 @@ -menu "ESP-NN" - -choice NN_OPTIMIZATIONS - bool "Optimization for nn functions" - default NN_OPTIMIZED - help - Use ANSI-C versions for verification and debug purpose. - Optimisations are automatically picked up for a chipset. - For ESP32-S3, assembly optimisations are selected. - For other platforms(viz., ESP32, ESP32-C3), generic optimisations are used. - -config NN_ANSI_C - bool "ANSI C" - help - ANSI C versions for verification and debug purposes. -config NN_OPTIMIZED - bool "Optimized versions" - help - Optimisations are automatically picked up for a chipset. - For ESP32-S3, assembly optimisations are selected. - For other platforms(viz., ESP32, ESP32-C3), generic optimisations are used. -endchoice - -config NN_OPTIMIZATIONS - int - default 0 if NN_ANSI_C - default 1 if NN_OPTIMIZED - -endmenu diff --git a/code/components/esp-nn/LICENSE b/code/components/esp-nn/LICENSE deleted file mode 100644 index d6456956..00000000 --- a/code/components/esp-nn/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/code/components/esp-nn/README.md b/code/components/esp-nn/README.md deleted file mode 100644 index f70f4074..00000000 --- a/code/components/esp-nn/README.md +++ /dev/null @@ -1,55 +0,0 @@ -# ESP-NN - -The library contains optimised NN (Neural Network) functions for various Espressif chipsets. - -* Supported platforms: - * TensorFlow Lite Micro (TFLite Micro). Repo can be found [here](https://github.com/espressif/tflite-micro-esp-examples) - -* Supported ESP chipsets include: - * ESP32-S3 (Assembly versions optimised to benefit from vector instructions of ESP32-S3) - * ESP32 (Generic optimisations) - * ESP32-C3 (Generic optimisations) - -## Performance - -### Kernelwise performance for s8 versions: - - * Kernelwise performance on ESP32-S3 chip - * Numbers are ticks taken for kernel to execute - * Chip config: 240MHz, SPI: QPI 80MHz, Data cache: 64KB - - | Function | ANSI C | ESP32-S3 Opt | Opt Ratio | Data info | Memory | - | ----------------| --------|---------|---------|-------------|-----------| - | elementwise_add | 320397 | 87119 | 3.68 | size = 1615 | External | - | elementwise_mul | 125958 | 44239 | 2.85 | size = 1615 | External | - | convolution | 4663012 | 428675 | 10.88 | input(10,10), filter(64x1x1x64) | External | - | convolution | 301014 | 32433 | 9.28 | input(8,8), filter(16x1x1x16) | External | - | convolution | 2115418 | 1020923 | 2.07 | input(10,10), filter(64x3x3x3) | External | - | depthwise conv | 1190062 | 203278 | 5.85 | input (18, 18), pad(0,0), stride(1,1) filter: 1x3x3x16 | External | - | depthwise conv | 837072 | 182335 | 4.59 | input (12, 12), pad(1,1), stride(1,1) filter: 8x5x5x4 | External | - | max pool | 485714 | 76747 | 6.33 | input(16,16), filter (1x3x3x16) | Internal | - | avg pool | 541462 | 160580 | 3.37 | input(16,16), filter (1x3x3x16) | Internal | - | fully connected | 15853 | 9547 | 1.66 | len: 265, ch = 3 | Internal | - | prelu (relu6) | 19472 | 2734 | 7.12 | size, 1615 | Internal | - - -## Configuration - - * To configure, please use `idf.py menuconfig` and under `ESP-NN` select `NN_OPTIMIZATIONS` - * There are two options presented: - * Optimized versions - * ANSI C - - * Default selection is for `Optimized versions`. For ESP32-S3, assembly versions are automatically selected, whereas for other chipsets (viz., ESP32, ESP32-C3), generic optimisations are selected. - * For debugging purposes, you may want to select `ANSI C` reference versions. - - -## Contributing - -If you encounter an issue with ESP-NN, or wish to submit a feature request, please use the Issues section on the Github. - -For general questions related to this library, please use the esp32.com forum. - -## Copyrights and License - -All original source code in this repository is Copyright (C) 2020-2021 Espressif Systems. This source code is licensed under the Apache License 2.0 as described in the file LICENSE. diff --git a/code/components/esp-nn/include/esp_nn.h b/code/components/esp-nn/include/esp_nn.h deleted file mode 100644 index bd533119..00000000 --- a/code/components/esp-nn/include/esp_nn.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#if defined(CONFIG_NN_OPTIMIZED) -// select apt optimisations -#ifdef CONFIG_IDF_TARGET_ESP32S3 -#define ARCH_ESP32_S3 1 -#endif -#ifdef CONFIG_IDF_TARGET_ESP32 -#define ARCH_ESP32 1 -#endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* reference kernels included by default */ -#include "esp_nn_ansi_headers.h" - -#if defined(CONFIG_NN_OPTIMIZED) -#if defined(ARCH_ESP32_S3) -#include "esp_nn_esp32s3.h" -#else // for other platforms use generic optimisations -#include "esp_nn_generic_opt.h" -#endif // #if defined(ARCH_ESP32_S3) -#else -#include "esp_nn_ansi_c.h" -#endif - -#ifdef __cplusplus -} -#endif diff --git a/code/components/esp-nn/include/esp_nn_ansi_c.h b/code/components/esp-nn/include/esp_nn_ansi_c.h deleted file mode 100644 index 8279ebef..00000000 --- a/code/components/esp-nn/include/esp_nn_ansi_c.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/** - * @file Header definitions to include for ANSI C versions. - * These are just typedefs to pick up ANSI versions. - */ - -#pragma once - -#include "esp_nn_defs.h" -#include "esp_nn_ansi_headers.h" - -#define esp_nn_add_elementwise_s8 esp_nn_add_elementwise_s8_ansi -#define esp_nn_mul_elementwise_s8 esp_nn_mul_elementwise_s8_ansi - -#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_ansi - -#define esp_nn_conv_s8 esp_nn_conv_s8_ansi - -#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_ansi -#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_ansi - -#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_ansi -#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_ansi - -#define esp_nn_relu6_s8 esp_nn_relu6_s8_ansi - -#define esp_nn_avg_pool_s8 esp_nn_avg_pool_s8_ansi -#define esp_nn_max_pool_s8 esp_nn_max_pool_s8_ansi - -#define esp_nn_fully_connected_s8 esp_nn_fully_connected_s8_ansi - -#define esp_nn_get_softmax_scratch_size esp_nn_get_softmax_scratch_size_ansi -#define esp_nn_set_softmax_scratch_buf esp_nn_set_softmax_scratch_buf_ansi -#define esp_nn_softmax_s8 esp_nn_softmax_s8_ansi diff --git a/code/components/esp-nn/include/esp_nn_ansi_headers.h b/code/components/esp-nn/include/esp_nn_ansi_headers.h deleted file mode 100644 index 52ebb680..00000000 --- a/code/components/esp-nn/include/esp_nn_ansi_headers.h +++ /dev/null @@ -1,309 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -/** - * @file Header definitions to include for esp_nn reference functions - */ - -#include "esp_nn_defs.h" -/************************** Basic math functions ****************************/ - -/** - * @brief elementwise addition - * - * @note inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - * - * shift values are expected to be <= 0 - */ -void esp_nn_add_elementwise_s8_ansi(const int8_t *input1_data, - const int8_t *input2_data, - const int32_t input1_offset, - const int32_t input2_offset, - const int32_t input1_mult, - const int32_t input2_mult, - const int32_t input1_shift, - const int32_t input2_shift, - const int32_t left_shift, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t activation_min, - const int32_t activation_max, - const int32_t size); -/** - * @brief elementwise multiplication - * - * @note inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - * - * output shift is expected to be <= 0 - */ -void esp_nn_mul_elementwise_s8_ansi(const int8_t *input1_data, - const int8_t *input2_data, - const int32_t input1_offset, - const int32_t input2_offset, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t activation_min, - const int32_t activation_max, - const int32_t size); - - -/************************** Convolution functions *****************************/ - -/** - * @brief depthwise convolution per channel - * - * @note inputs type: int8_t, output: int8_t - * Version used in tflite is per channel. - * This version follows the same footsprints. - * Meaning, it has per out_channel shift and multiplier for - * requantization - * - * optimization notes: Though input_offset is int32 type, - * offset values are contained in 8 bits [-128, 127] - */ -void esp_nn_depthwise_conv_s8_ansi(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const dw_conv_params_t *conv_params, - const quant_data_t *quant_data); - -/** - * @brief 2d-convolution channelwise - * - * @note operation: result += (input + offset) * filter - * - * inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - */ -void esp_nn_conv_s8_ansi(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const conv_params_t *conv_params, - const quant_data_t *quant_data); - -int esp_nn_get_conv_scratch_size_ansi(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const conv_params_t *conv_params); -void esp_nn_set_conv_scratch_buf_ansi(const void *buf); - -int esp_nn_get_depthwise_conv_scratch_size_ansi(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const dw_conv_params_t *conv_params); -void esp_nn_set_depthwise_conv_scratch_buf_ansi(const void *buf); - -/************************** Activation functions *****************************/ - -/** - * @brief relu6 - * - * @note inout: int8_t - */ -void esp_nn_relu6_s8_ansi(int8_t *data, uint16_t size); - -/************************** Pooling functions *****************************/ - - -/** - * @brief max_pool - * - * @note inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - */ -void esp_nn_max_pool_s8_ansi(const int8_t *input, - const uint16_t input_wd, - const uint16_t input_ht, - int8_t *output, - const uint16_t output_wd, - const uint16_t output_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint16_t filter_wd, - const uint16_t filter_ht, - const uint16_t pad_wd, - const uint16_t pad_ht, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t channels); - -/** - * @brief avg_pool - * - * @note inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - */ -void esp_nn_avg_pool_s8_ansi(const int8_t *input, - const uint16_t input_wd, - const uint16_t input_ht, - int8_t *output, - const uint16_t output_wd, - const uint16_t output_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint16_t filter_wd, - const uint16_t filter_ht, - const uint16_t pad_wd, - const uint16_t pad_ht, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t channels); - - -/************************** Fully connected functions ***********************/ - -/** - * @brief fully connected - * - * @note inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - */ -void esp_nn_fully_connected_s8_ansi(const int8_t *input_data, - const int32_t input_offset, - const uint16_t row_len, - const int8_t *filter_data, - const int32_t filter_offset, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_channels, - const int32_t out_offset, - const int32_t out_shift, - const int32_t out_mult, - const int32_t activation_min, - const int32_t activation_max); - -/** - * @brief Get scratch buffer size needed by softmax function - * - * @param width - * @param height - * @return size in bytes - * - * @note buffer must be 4 byte aligned - */ -int32_t esp_nn_get_softmax_scratch_size_ansi(const int32_t width, const int32_t height); - -/* ANSI C function to be hooked up when optimised version needed */ -int32_t esp_nn_get_softmax_scratch_size_opt(const int32_t width, const int32_t height); - -/** - * @brief Set scratch buffer to be used by softmax function - * - * @param buffer this can be NULL if one needs to unset it - * must be aligned to 4 bytes - */ -void esp_nn_set_softmax_scratch_buf_ansi(void *buffer); - -/** - * @brief reference softmax function - * - * @note inputs type: int8_t, output: int8_t - */ -void esp_nn_softmax_s8_ansi(const int8_t *input_data, - const int32_t height, - const int32_t width, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - int8_t *output_data); - - -//////////////////////////// Generic optimisations ///////////////////////////// - -/************************** Convolution functions *****************************/ - -/** - * @brief 2d-convolution channelwise optimized version - * - * @note operation: result += (input + offset) * filter - * - * inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - */ -void esp_nn_conv_s8_opt(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const conv_params_t *conv_params, - const quant_data_t *quant_data); - -/** - * @brief depthwise convolution per channel optimized version - * - * @note inputs type: int8_t, output: int8_t - * Version used in tflite is per channel. - * This version follows the same footsprints. - * Meaning, it has per out_channel shift and multiplier for - * requantization - * - * optimization notes: Though input_offset is int32 type, - * offset values are contained in 8 bits [-128, 127] - */ -void esp_nn_depthwise_conv_s8_opt(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const dw_conv_params_t *conv_params, - const quant_data_t *quant_data); - -int esp_nn_get_conv_scratch_size_opt(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const conv_params_t *conv_params); -void esp_nn_set_conv_scratch_buf_opt(const void *buf); - -int esp_nn_get_depthwise_conv_scratch_size_opt(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const dw_conv_params_t *conv_params); -void esp_nn_set_depthwise_conv_scratch_buf_opt(const void *buf); - -/* ANSI C function to be hooked up when optimised version needed */ -void esp_nn_set_softmax_scratch_buf_opt(void *buffer); - -/** - * @brief optimised version of softmax function - * - * @note the function uses extra buffer (4 * width bytes) - * hence, scratch buffers must be set before calling this. - */ -void esp_nn_softmax_s8_opt(const int8_t *input_data, - const int32_t height, - const int32_t width, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - int8_t *output_data); diff --git a/code/components/esp-nn/include/esp_nn_defs.h b/code/components/esp-nn/include/esp_nn_defs.h deleted file mode 100644 index 756d8e6f..00000000 --- a/code/components/esp-nn/include/esp_nn_defs.h +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2022 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -/** - * @brief structure to club data dims - * this structure can be used for input, output and filter - */ -typedef struct data_dims { - int32_t width; - int32_t height; - int32_t channels; - - int32_t extra; // can be used as batch or any other param -} data_dims_t; - -/** - * @brief 2d data structure (width, height) - * - */ -typedef struct data_2d { - int32_t width; - int32_t height; -} data_2d_t; - -/** - * @brief min/max activation - */ -typedef struct act_params { - int32_t min; - int32_t max; -} act_params_t; - -/** - * @brief per channel quant data - * - * @note number of shift and mult elements are equal to output channels - */ -typedef struct quant_data { - int32_t *shift; - int32_t *mult; -} quant_data_t; - -/** - * @brief params specific to convolution 2d - * - */ -typedef struct conv_params { - int32_t in_offset; - int32_t out_offset; - data_2d_t stride; - data_2d_t padding; - data_2d_t dilation; - act_params_t activation; -} conv_params_t; - -/** - * @brief params specific to depthwise convolution 2d - * - */ -typedef struct dw_conv_params { - int32_t in_offset; - int32_t out_offset; - int32_t ch_mult; // channel multiplier. (in_ch * ch_mult = out_ch) - data_2d_t stride; - data_2d_t padding; - data_2d_t dilation; - act_params_t activation; -} dw_conv_params_t; diff --git a/code/components/esp-nn/include/esp_nn_esp32s3.h b/code/components/esp-nn/include/esp_nn_esp32s3.h deleted file mode 100644 index 0f52c943..00000000 --- a/code/components/esp-nn/include/esp_nn_esp32s3.h +++ /dev/null @@ -1,231 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/** - * @file Header definitions to include for esp_nn optimized functions for - * the ESP32-S3 platform - */ - -#pragma once - -#include "esp_nn_defs.h" -#include "esp_nn_ansi_headers.h" - -/************************** Basic math functions *****************************/ - - -/** - * @brief elementwise addition - * - * @note inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - * - * shift values are expected to be <= 0 - */ -void esp_nn_add_elementwise_s8_esp32s3(const int8_t *input1_data, - const int8_t *input2_data, - const int32_t input1_offset, - const int32_t input2_offset, - const int32_t input1_mult, - const int32_t input2_mult, - const int32_t input1_shift, - const int32_t input2_shift, - const int32_t left_shift, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t activation_min, - const int32_t activation_max, - const int32_t size); - -/** - * @brief elementwise multiplication - * - * @note inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - * - * output shift is expected to be <= 0 - */ -void esp_nn_mul_elementwise_s8_esp32s3(const int8_t *input1_data, - const int8_t *input2_data, - const int32_t input1_offset, - const int32_t input2_offset, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t activation_min, - const int32_t activation_max, - const int32_t size); - - -/************************** Convolution functions *****************************/ - -/** - * @brief depthwise convolution per channel - * - * @note inputs type: int8_t, output: int8_t - * Version used in tflite is per channel. - * This version follows the same footsprints. - * Meaning, it has per out_channel shift and multiplier for - * requantization - * - * optimization notes: Though input_offset is int32 type, - * offset values are contained in 8 bits [-128, 127] - */ -void esp_nn_depthwise_conv_s8_esp32s3(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *output_data, - const dw_conv_params_t *conv_params, - const quant_data_t *quant_data); - -/** - * @brief 2d - convolution channelwise - * - * @note operation: result += (input + offset) * filter - * - * inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - */ -void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *output_data, - const conv_params_t *conv_params, - const quant_data_t *quant_data); - -int esp_nn_get_conv_scratch_size_esp32s3(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const conv_params_t *conv_params); -void esp_nn_set_conv_scratch_buf_esp32s3(const void *buf); - -int esp_nn_get_depthwise_conv_scratch_size_esp32s3(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const dw_conv_params_t *conv_params); -void esp_nn_set_depthwise_conv_scratch_buf_esp32s3(const void *buf); - -/************************** Pooling functions *****************************/ - -/** - * @brief max_pool - * - * @note inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - */ -void esp_nn_max_pool_s8_esp32s3(const int8_t *input, - const uint16_t input_wd, - const uint16_t input_ht, - int8_t *output, - const uint16_t output_wd, - const uint16_t output_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint16_t filter_wd, - const uint16_t filter_ht, - const uint16_t pad_wd, - const uint16_t pad_ht, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t channels); - -/** - * @brief avg_pool - * - * @note inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - */ -void esp_nn_avg_pool_s8_esp32s3(const int8_t *input, - const uint16_t input_wd, - const uint16_t input_ht, - int8_t *output, - const uint16_t output_wd, - const uint16_t output_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint16_t filter_wd, - const uint16_t filter_ht, - const uint16_t pad_wd, - const uint16_t pad_ht, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t channels); - - -/************************** Fully connected functions *****************************/ - -/** - * @brief fully connected - * - * @note inputs type: int8_t, output: int8_t - * input offsets: although int32_t, they are contained in 8 bits [-128, 127] - * - * Current version works only on aligned input. - * row_len and channels should both be multiple of 8. - */ -void esp_nn_fully_connected_s8_esp32s3(const int8_t *input_data, - const int32_t input_offset, - const uint16_t row_len, - const int8_t *filter_data, - const int32_t filter_offset, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_channels, - const int32_t out_offset, - const int32_t out_shift, - const int32_t out_mult, - const int32_t activation_min, - const int32_t activation_max); - -/** - * @brief relu6 - * - * @note inout: int8_t - */ -void esp_nn_relu6_s8_esp32s3(int8_t *data, uint16_t size); - -/********************** function defines ***************************/ - -#define esp_nn_add_elementwise_s8 esp_nn_add_elementwise_s8_esp32s3 -#define esp_nn_mul_elementwise_s8 esp_nn_mul_elementwise_s8_esp32s3 - -#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_esp32s3 - -#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_esp32s3 -#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_esp32s3 - -#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_esp32s3 -#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_esp32s3 - -#define esp_nn_conv_s8 esp_nn_conv_s8_esp32s3 - -#define esp_nn_relu6_s8 esp_nn_relu6_s8_esp32s3 - -#define esp_nn_avg_pool_s8 esp_nn_avg_pool_s8_esp32s3 -#define esp_nn_max_pool_s8 esp_nn_max_pool_s8_esp32s3 - -#define esp_nn_fully_connected_s8 esp_nn_fully_connected_s8_esp32s3 - -#define esp_nn_get_softmax_scratch_size esp_nn_get_softmax_scratch_size_opt -#define esp_nn_set_softmax_scratch_buf esp_nn_set_softmax_scratch_buf_opt -#define esp_nn_softmax_s8 esp_nn_softmax_s8_opt diff --git a/code/components/esp-nn/include/esp_nn_generic_opt.h b/code/components/esp-nn/include/esp_nn_generic_opt.h deleted file mode 100644 index 136cba5d..00000000 --- a/code/components/esp-nn/include/esp_nn_generic_opt.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/** - * @file Header definitions to include for esp_nn generic optimisations - * For functions which not having optimisations, _ansi versions are picked. - */ - -#pragma once - -#include "esp_nn_defs.h" -#include "esp_nn_ansi_headers.h" - -#define esp_nn_add_elementwise_s8 esp_nn_add_elementwise_s8_ansi -#define esp_nn_mul_elementwise_s8 esp_nn_mul_elementwise_s8_ansi - -#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_opt - -#define esp_nn_conv_s8 esp_nn_conv_s8_opt - -#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_opt -#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_opt - -#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_opt -#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_opt - -#define esp_nn_relu6_s8 esp_nn_relu6_s8_ansi - -#define esp_nn_avg_pool_s8 esp_nn_avg_pool_s8_ansi -#define esp_nn_max_pool_s8 esp_nn_max_pool_s8_ansi - -#define esp_nn_fully_connected_s8 esp_nn_fully_connected_s8_ansi - -#define esp_nn_get_softmax_scratch_size esp_nn_get_softmax_scratch_size_opt -#define esp_nn_set_softmax_scratch_buf esp_nn_set_softmax_scratch_buf_opt -#define esp_nn_softmax_s8 esp_nn_softmax_s8_opt diff --git a/code/components/esp-nn/src/activation_functions/esp_nn_relu_ansi.c b/code/components/esp-nn/src/activation_functions/esp_nn_relu_ansi.c deleted file mode 100644 index 1d4c3d11..00000000 --- a/code/components/esp-nn/src/activation_functions/esp_nn_relu_ansi.c +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include - -void esp_nn_relu6_s8_ansi(int8_t *data, uint16_t size) -{ - int32_t i; - - for (i = 0; i < size; i++) { - int32_t ip = data[i]; - - ip = max(ip, 0); - data[i] = min(ip, 6); - } -} diff --git a/code/components/esp-nn/src/basic_math/esp_nn_add_ansi.c b/code/components/esp-nn/src/basic_math/esp_nn_add_ansi.c deleted file mode 100644 index 617386cf..00000000 --- a/code/components/esp-nn/src/basic_math/esp_nn_add_ansi.c +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include - -void esp_nn_add_elementwise_u8_ansi(const uint8_t *input1_data, - const uint8_t *input2_data, - const int32_t input1_offset, - const int32_t input2_offset, - const int32_t input1_mult, - const int32_t input2_mult, - const int32_t input1_shift, - const int32_t input2_shift, - const int32_t left_shift, - uint8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t activation_min, - const int32_t activation_max, - const int32_t size) -{ - for (int i = 0; i < size; i++) { - int32_t tmp1 = input1_data[i] + input1_offset; - int32_t tmp2 = input2_data[i] + input2_offset; - - tmp1 <<= left_shift; - tmp2 <<= left_shift; - - tmp1 = esp_nn_sat_round_doubling_high_mul(tmp1, input1_mult); - tmp2 = esp_nn_sat_round_doubling_high_mul(tmp2, input2_mult); - - tmp1 = esp_nn_div_by_power_of_two(tmp1, -input1_shift); - tmp2 = esp_nn_div_by_power_of_two(tmp2, -input2_shift); - - int32_t out = tmp1 + tmp2; - out = esp_nn_sat_round_doubling_high_mul(out, out_mult); - out = esp_nn_div_by_power_of_two(out, -out_shift); - out = out + out_offset; - - out = max(activation_min, min(out, activation_max)); - output[i] = (uint8_t) out; - } -} - -void esp_nn_add_elementwise_s8_ansi(const int8_t *input1_data, - const int8_t *input2_data, - const int32_t input1_offset, - const int32_t input2_offset, - const int32_t input1_mult, - const int32_t input2_mult, - const int32_t input1_shift, - const int32_t input2_shift, - const int32_t left_shift, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t activation_min, - const int32_t activation_max, - const int32_t size) -{ - for (int i = 0; i < size; i++) { - int32_t tmp1 = input1_data[i] + input1_offset; - int32_t tmp2 = input2_data[i] + input2_offset; - - tmp1 <<= left_shift; - tmp2 <<= left_shift; - - tmp1 = esp_nn_sat_round_doubling_high_mul(tmp1, input1_mult); - tmp2 = esp_nn_sat_round_doubling_high_mul(tmp2, input2_mult); - - tmp1 = esp_nn_div_by_power_of_two(tmp1, -input1_shift); - tmp2 = esp_nn_div_by_power_of_two(tmp2, -input2_shift); - - int32_t out = tmp1 + tmp2; - out = esp_nn_sat_round_doubling_high_mul(out, out_mult); - out = esp_nn_div_by_power_of_two(out, -out_shift); - out = out + out_offset; - - out = max(activation_min, min(out, activation_max)); - output[i] = (int8_t) out; - } -} diff --git a/code/components/esp-nn/src/basic_math/esp_nn_mul_ansi.c b/code/components/esp-nn/src/basic_math/esp_nn_mul_ansi.c deleted file mode 100644 index db8e8cc0..00000000 --- a/code/components/esp-nn/src/basic_math/esp_nn_mul_ansi.c +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include - -void esp_nn_mul_elementwise_s8_ansi(const int8_t *input1_data, - const int8_t *input2_data, - const int32_t input1_offset, - const int32_t input2_offset, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t activation_min, - const int32_t activation_max, - const int32_t size) -{ - for (int i = 0; i < size; i++) { - int32_t tmp1 = input1_data[i] + input1_offset; - int32_t tmp2 = input2_data[i] + input2_offset; - - int32_t out = tmp1 * tmp2; - out = esp_nn_multiply_by_quantized_mult(out, out_mult, out_shift); - out = out + out_offset; - - out = max(activation_min, min(out, activation_max)); - output[i] = (int8_t) out; - } -} diff --git a/code/components/esp-nn/src/common/common_functions.h b/code/components/esp-nn/src/common/common_functions.h deleted file mode 100644 index 0a74eca4..00000000 --- a/code/components/esp-nn/src/common/common_functions.h +++ /dev/null @@ -1,255 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include - -/** - * c99 standard still doesn't strictly inline functions - * We need to use attribute as well to do this. - */ -#define __NN_FORCE_INLINE__ __attribute((always_inline)) static inline - -/* min/max macros */ -#ifndef max -#define max(a, b) ({ \ - __typeof__ (a) _a = (a); \ - __typeof__ (b) _b = (b); \ - _a > _b ? _a : _b; \ -}) - -#define min(a, b) ({ \ - __typeof__ (a) _a = (a); \ - __typeof__ (b) _b = (b); \ - _a < _b ? _a : _b; \ -}) -#endif - -__NN_FORCE_INLINE__ int32_t esp_nn_clz32(uint32_t in) -{ -#if CONFIG_IDF_TARGET_ARCH_XTENSA - __asm__ volatile("nsau %0, %0" : "+r" (in)); - return in; -#elif defined(__GNUC__) - return __builtin_clz(in); -#else - int32_t count = 32; - uint32_t x = in, y = in >> 16; - if (y != 0) { - count -= 16; - x = y; - } - y = x >> 8; - if (y != 0) { - count -= 8; - x = y; - } - y = x >> 4; - if (y != 0) { - count -= 4; - x = y; - } - y = x >> 2; - if (y != 0) { - count -= 2; - x = y; - } - y = x >> 1; - if (y != 0) { - return count - 2; - } - return count - x; -#endif -} - -/** - * Signed saturate a 32 bit value to 8 bits keeping output in 32 bit variable. - */ -__NN_FORCE_INLINE__ int32_t esp_nn_saturate8(int32_t in) -{ -#if CONFIG_IDF_TARGET_ARCH_XTENSA - __asm__ volatile("clamps %0, %0, 7" : "+a"(in)); - return in; -#else - return max(INT8_MIN, min(in, INT8_MAX)); -#endif -} - -__NN_FORCE_INLINE__ int32_t esp_nn_pick_sat_high32_of64(int64_t val64) -{ - int32_t sign = (int32_t) (val64 >> 63); - int32_t to_add = sign & ((1ul << 31) - 1); - return (int32_t) ((int64_t) (val64 + to_add) >> 31); -} - -__NN_FORCE_INLINE__ int32_t esp_nn_sat_round_doubling_high_mul(int32_t in0, int32_t in1) -{ - int32_t result; - int64_t in0_64 = (int64_t) in0; - bool overflow = (in0 == in1) && (in0 == (int32_t) INT32_MIN); - - /* Nudge value */ - int64_t nudge_val = 1 << 30; - if ((in0 < 0) ^ (in1 < 0)) { - nudge_val = 1 - nudge_val; - } - - /* Multiply and add nudge */ - int64_t mult = in0_64 * in1 + nudge_val; - - /* Round and pickup 32 bits */ - result = esp_nn_pick_sat_high32_of64(mult); - - return overflow ? INT32_MAX : result; -} - -/** - * fast version - * this will fail for values closer to INT32_MAX and INT32_MIN by `1 << (exponent - 1)`. - * We can afford to do this because we are at the very last stage of filter. - * Also it is pretty rare condition as our output is going to be 8 bit. - */ -__NN_FORCE_INLINE__ int32_t esp_nn_div_by_power_of_two_fast(int32_t val, int32_t exponent) -{ - int32_t to_add = (1 << (exponent - 1)) - (val < 0); - return (int32_t) ((val + to_add) >> exponent); -} - -__NN_FORCE_INLINE__ int32_t esp_nn_div_by_power_of_two(int32_t val, int32_t exponent) -{ - int32_t result; - - const int32_t mask = (1 << exponent) - 1; - const int32_t remainder = val & mask; - - result = val >> exponent; - int32_t threshold = (mask >> 1) + (result < 0); - - if (remainder > threshold) { - result += 1; - } - return result; -} - -__NN_FORCE_INLINE__ int32_t esp_nn_multiply_by_quantized_mult(int32_t x, int32_t mult, int32_t shift) -{ - int32_t left_shift = shift > 0 ? shift : 0; - int32_t right_shift = shift > 0 ? 0 : -shift; - int32_t result = esp_nn_sat_round_doubling_high_mul(x * (1 << left_shift), mult); - return esp_nn_div_by_power_of_two(result, right_shift); -} - -__NN_FORCE_INLINE__ int32_t esp_nn_multiply_by_quantized_mult_fast(int32_t x, int32_t mult, int32_t shift) -{ - int32_t left_shift = max(shift, 0); - int32_t right_shift = left_shift - shift; - - int64_t nudge_val = 1 << 30; - int64_t in0_64 = (int64_t) (x << left_shift); - - /* Multiply and add nudge */ - int64_t mult_64 = in0_64 * mult + nudge_val; - int32_t result = (int32_t) (mult_64 >> 31); - if (right_shift) { - result = esp_nn_div_by_power_of_two_fast(result, right_shift); - } - return result; -} - -static void esp_nn_aligned_s8_pad_with_value(const int8_t *src, int8_t *dst, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const int32_t pad_val, - const uint16_t pad_wd, - const uint16_t pad_ht) -{ - /* memset with pad_val */ - memset(dst, pad_val, ((input_wd + 2 * pad_wd) * (input_ht + 2 * pad_ht)) * channels); - dst += (pad_wd + input_wd + pad_wd) * channels; - - for (int i = 0; i < input_ht; i++) { - dst += pad_wd * channels; - for (int j = 0; j < input_wd * channels; j++) { - *dst++ = *src++; - } - dst += pad_wd * channels; - } -} - -static void esp_nn_aligned_s8_pad_end_with_value(const int8_t *src, int8_t *dst, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const int32_t pad_val, - const uint16_t pad_wd, - const uint16_t pad_ht) -{ - for (int i = 0; i < input_ht; i++) { - for (int j = 0; j < input_wd * channels; j++) { - *dst++ = *src++; - } - if (pad_wd) { - memset(dst, pad_val, pad_wd * channels); - dst += pad_wd * channels; - } - } - /* pad end `pad_ht` lines at end */ - if (pad_ht) { - memset(dst, pad_val, (input_wd + pad_wd) * pad_ht * channels); - } -} - -/** - * @brief convert 8 bit input data to 16 bit - * - * @param src int8_t source data - * @param dst int16_t dst data - * @param size length of data - * @param offset offset to be added to src data. Range: [-128, 127] - */ -__NN_FORCE_INLINE__ void esp_nn_s8_to_s16_with_offset(const int8_t *src, int16_t *dst, - const int size, const int32_t offset) -{ - int i = 0; - for (; i < size; i += 2) { - dst[i + 0] = src[i + 0] + offset; - dst[i + 1] = src[i + 1] + offset; - } - if(i < size) { - dst[i] = src[i] + offset; - } -} - -/** - * @brief convert 8 bit input data to 16 bit - * - * @param src int8_t source data - * @param dst int16_t dst data - * @param size length of data - */ -__NN_FORCE_INLINE__ void esp_nn_s8_to_s16(const int8_t *src, int16_t *dst, const int size) -{ - int i = 0; - for (; i < size; i += 2) { - dst[i + 0] = src[i + 0]; - dst[i + 1] = src[i + 1]; - } - if(i < size) { - dst[i] = src[i]; - } -} diff --git a/code/components/esp-nn/src/convolution/esp_nn_conv_ansi.c b/code/components/esp-nn/src/convolution/esp_nn_conv_ansi.c deleted file mode 100644 index 677c0ad8..00000000 --- a/code/components/esp-nn/src/convolution/esp_nn_conv_ansi.c +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include - -int esp_nn_get_conv_scratch_size_ansi(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const conv_params_t *conv_params) -{ - return 0; -} - -void esp_nn_set_conv_scratch_buf_ansi(const void *buf) -{ - -} - -/** - * Assumption 1: i/p channels == o/p channels - * Assumption 2: Pointers are valid - * Assumption 3: dialation width = 1 - */ -void esp_nn_conv_u8_ansi(const uint8_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t in_channels, - const int32_t input_offset, - const uint16_t pad_wd, - const uint16_t pad_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint8_t *filter_data, - const uint16_t filter_wd, - const uint16_t filter_ht, - const int32_t filter_offset, - const int32_t *bias, - uint8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const uint16_t out_channels, - const int32_t out_offset, - const int32_t out_shift, - const int32_t out_mult, - const int32_t activation_min, - const int32_t activation_max) -{ - for (int out_y = 0; out_y < out_ht; out_y++) { //height loop - const int16_t base_y = (out_y * stride_ht) - pad_ht; - for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop - const int16_t base_x = (out_x * stride_wd) - pad_wd; - for (int out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {//channel_loop - int32_t result = 0; - - /* Select filter so as the point doesn't lie outside block */ - int filter_y_start = max(0, -base_y); - int filter_x_start = max(0, -base_x); - int filter_y_end = min(filter_ht, input_ht - base_y); - int filter_x_end = min(filter_wd, input_wd - base_x); - - for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - const int32_t idx_y = base_y + filter_y_idx; - for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t idx_x = base_x + filter_x_idx; - for (int in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) { - int32_t input_index = (idx_y * input_wd + idx_x) * in_channels + in_ch_idx; - int32_t filter_index = ((out_ch_idx * filter_ht + filter_y_idx) - * filter_wd + filter_x_idx) * in_channels - + in_ch_idx; - int32_t input_val = input_data[input_index] + input_offset; - int32_t filter_val = filter_data[filter_index] + filter_offset; - result += input_val * filter_val; - } - } - } - if (bias) { - result += bias[out_ch_idx]; - } - result = esp_nn_multiply_by_quantized_mult(result, out_mult, out_shift); - result += out_offset; - result = max(result, activation_min); - result = min(result, activation_max); - - int out_index = (out_y * out_wd + out_x) * out_channels + out_ch_idx; - out_data[out_index] = (uint8_t) result; - } - } - } -} - -/** - * Assumption 1: i/p channels == o/p channels - * Assumption 2: Pointers are valid - * Assumption 3: dialation width = 1 - */ -void esp_nn_conv_s8_ansi(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const conv_params_t *conv_params, - const quant_data_t *quant_data) -{ - const uint16_t input_wd = input_dims->width; - const uint16_t input_ht = input_dims->height; - const uint16_t in_channels = input_dims->channels; - const int32_t input_offset = conv_params->in_offset; - const int32_t out_offset = conv_params->out_offset; - const uint16_t pad_wd = conv_params->padding.width; - const uint16_t pad_ht = conv_params->padding.height; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - const uint16_t filter_wd = filter_dims->width; - const uint16_t filter_ht = filter_dims->height; - const uint16_t out_wd = output_dims->width; - const uint16_t out_ht = output_dims->height; - const uint16_t out_channels = output_dims->channels; - const int32_t *out_shift = quant_data->shift; - const int32_t *out_mult = quant_data->mult; - const int32_t activation_min = conv_params->activation.min; - const int32_t activation_max = conv_params->activation.max; - - int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx; - - for (out_y = 0; out_y < out_ht; out_y++) { - for (out_x = 0; out_x < out_wd; out_x++) { - for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) { - int32_t conv_out = 0; - - const int32_t base_y = stride_ht * out_y - pad_ht; - const int32_t base_x = stride_wd * out_x - pad_wd; - - const int32_t filter_y_start = max(0, -base_y); - const int32_t filter_x_start = max(0, -base_x); - - const int32_t filter_y_end = min(filter_ht, input_ht - base_y); - const int32_t filter_x_end = min(filter_wd, input_wd - base_x); - - for (filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t in_row = base_y + filter_y_idx; - const int32_t in_col = base_x + filter_x_idx; - int32_t input_base_offset = (in_row * input_wd + in_col) * in_channels; - int32_t filter_base_offset = out_ch_idx * in_channels * filter_ht * filter_wd + - (filter_y_idx * filter_wd + filter_x_idx) * in_channels; - for (in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) { - conv_out += - (input_data[input_base_offset + in_ch_idx] + input_offset) * - filter_data[filter_base_offset + in_ch_idx]; - } - } - } - if (bias) { - conv_out += bias[out_ch_idx]; - } - conv_out = esp_nn_multiply_by_quantized_mult(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]); - conv_out += out_offset; - conv_out = max(conv_out, activation_min); - conv_out = min(conv_out, activation_max); - *out_data++ = (int8_t) conv_out; - } - } - } -} diff --git a/code/components/esp-nn/src/convolution/esp_nn_conv_esp32s3.c b/code/components/esp-nn/src/convolution/esp_nn_conv_esp32s3.c deleted file mode 100644 index e13129b2..00000000 --- a/code/components/esp-nn/src/convolution/esp_nn_conv_esp32s3.c +++ /dev/null @@ -1,463 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include - -static int16_t *scratch_buffer = NULL; - -extern void esp_nn_conv_s8_mult8_1x1_esp32s3(const int8_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t in_channels, - const int32_t input_offset, - const int8_t *filter_aligned, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const uint16_t out_channels, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max, - void *buffer /* scratch buffer */); - -extern void esp_nn_conv_s16_mult4_1x1_esp32s3(const int16_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t in_channels, - const int16_t *filter_data, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const uint16_t out_channels, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max, - void *buffer /* scratch buffer */); - -extern void esp_nn_conv_s16_mult8_esp32s3(const int16_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t in_channels, - const uint16_t pad_wd, - const uint16_t pad_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const int16_t *filter_data, - const uint16_t filter_wd, - const uint16_t filter_ht, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const uint16_t out_channels, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max); - -extern void esp_nn_aligned_s8_to_s16_with_offset_esp32s3(const int8_t *src, int16_t *dst, - const int size, const int32_t offset); - -extern void esp_nn_s8_to_s16_esp32s3(const int8_t *src, int16_t *dst, const int size); - -static void esp_nn_conv_s8_unrolled(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const conv_params_t *conv_params, - const quant_data_t *quant_data) -{ - const uint16_t input_wd = input_dims->width; - const uint16_t input_ht = input_dims->height; - const uint16_t in_ch = input_dims->channels; - const int32_t input_offset = conv_params->in_offset; - const int32_t out_offset = conv_params->out_offset; - const uint16_t pad_wd = conv_params->padding.width; - const uint16_t pad_ht = conv_params->padding.height; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - const uint16_t filter_wd = filter_dims->width; - const uint16_t filter_ht = filter_dims->height; - const uint16_t out_wd = output_dims->width; - const uint16_t out_ht = output_dims->height; - const uint16_t out_ch = output_dims->channels; - const int32_t *out_shift = quant_data->shift; - const int32_t *out_mult = quant_data->mult; - const int32_t activation_min = conv_params->activation.min; - const int32_t activation_max = conv_params->activation.max; - - int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx; - - for (out_y = 0; out_y < out_ht; out_y++) { - for (out_x = 0; out_x < out_wd; out_x++) { - for (out_ch_idx = 0; out_ch_idx < out_ch; out_ch_idx++) { - int32_t conv_out = 0; - - const int32_t base_y = stride_ht * out_y - pad_ht; - const int32_t base_x = stride_wd * out_x - pad_wd; - - const int32_t filter_y_start = max(0, -base_y); - const int32_t filter_x_start = max(0, -base_x); - - const int32_t filter_y_end = min(filter_ht, input_ht - base_y); - const int32_t filter_x_end = min(filter_wd, input_wd - base_x); - - for (filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t in_row = base_y + filter_y_idx; - const int32_t in_col = base_x + filter_x_idx; - int32_t input_base_offset = (in_row * input_wd + in_col) * in_ch; - int32_t filter_base_offset = out_ch_idx * in_ch * filter_ht * filter_wd + - (filter_y_idx * filter_wd + filter_x_idx) * in_ch; - for (in_ch_idx = 0; in_ch_idx < in_ch; in_ch_idx++) { - conv_out += - (input_data[input_base_offset + in_ch_idx] + input_offset) * - filter_data[filter_base_offset + in_ch_idx]; - } - } - } - if (bias) { - conv_out += bias[out_ch_idx]; - } - conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]); - conv_out += out_offset; - conv_out = max(conv_out, activation_min); - conv_out = min(conv_out, activation_max); - *out_data++ = (int8_t) conv_out; - } - } - } -} - -static void esp_nn_conv_s8_pad_valid(const int8_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t in_channels, - const int32_t input_offset, - const uint16_t stride_wd, - const uint16_t stride_ht, - const int8_t *filter_data, - const uint16_t filter_wd, - const uint16_t filter_ht, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const uint16_t out_channels, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max) -{ - int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx; - - for (out_y = 0; out_y < out_ht; out_y++) { - for (out_x = 0; out_x < out_wd; out_x++) { - for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) { - int32_t conv_out = 0; - - const int32_t base_y = stride_ht * out_y; - const int32_t base_x = stride_wd * out_x; - - for (filter_y_idx = 0; filter_y_idx < filter_ht; filter_y_idx++) { - for (filter_x_idx = 0; filter_x_idx < filter_wd; filter_x_idx++) { - const int32_t in_row = base_y + filter_y_idx; - const int32_t in_col = base_x + filter_x_idx; - int32_t input_base_offset = (in_row * input_wd + in_col) * in_channels; - int32_t filter_base_offset = out_ch_idx * in_channels * filter_ht * filter_wd + - (filter_y_idx * filter_wd + filter_x_idx) * in_channels; - const int8_t *input_data_ptr = input_data + input_base_offset; - const int8_t *filter_data_ptr = filter_data + filter_base_offset; - for (in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) { - conv_out += (*input_data_ptr++ + input_offset) * *filter_data_ptr++; - } - } - } - if (bias) { - conv_out += bias[out_ch_idx]; - } - conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]); - conv_out += out_offset; - conv_out = max(conv_out, activation_min); - conv_out = min(conv_out, activation_max); - *out_data++ = (int8_t) conv_out; - } - } - } -} - -static void esp_nn_conv_s8_pad_valid_3x3(const int8_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t in_channels, - const int32_t input_offset, - const uint16_t stride_wd, - const uint16_t stride_ht, - const int8_t *filter_data, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const uint16_t out_channels, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max) -{ - int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx; - - for (out_y = 0; out_y < out_ht; out_y++) { - for (out_x = 0; out_x < out_wd; out_x++) { - const int32_t base_y = stride_ht * out_y; - const int32_t base_x = stride_wd * out_x; - for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) { - int32_t conv_out = 0; - for (filter_y_idx = 0; filter_y_idx < 3; filter_y_idx++) { - for (filter_x_idx = 0; filter_x_idx < 3; filter_x_idx++) { - const int32_t in_row = base_y + filter_y_idx; - const int32_t in_col = base_x + filter_x_idx; - int32_t input_base_offset = (in_row * input_wd + in_col) * in_channels; - int32_t filter_base_offset = out_ch_idx * in_channels * 3 * 3 + - (filter_y_idx * 3 + filter_x_idx) * in_channels; - const int8_t *input_data_ptr = input_data + input_base_offset; - const int8_t *filter_data_ptr = filter_data + filter_base_offset; - for (in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) { - conv_out += (*input_data_ptr++ + input_offset) * *filter_data_ptr++; - } - } - } - if (bias) { - conv_out += bias[out_ch_idx]; - } - conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]); - conv_out += out_offset; - conv_out = max(conv_out, activation_min); - conv_out = min(conv_out, activation_max); - *out_data++ = (int8_t) conv_out; - } - } - } -} - -static void esp_nn_conv_s8_pad_valid_ch3_3x3(const int8_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const int32_t input_offset, - const uint16_t stride_wd, - const uint16_t stride_ht, - const int8_t *filter_data, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const uint16_t out_channels, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max) -{ - int32_t out_ch_idx, out_y, out_x, filter_y_idx; - - /* use scratch_buffer to pre-compute offset factor */ - int16_t *filter_sum = (int16_t *) scratch_buffer; - const int8_t *filter_ptr = filter_data; - for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) { - int16_t sum_val = 0; - for (int i = 0; i < 9; i++) { - sum_val += *filter_ptr++; - sum_val += *filter_ptr++; - sum_val += *filter_ptr++; - } - *filter_sum++ = sum_val; - } - - for (out_y = 0; out_y < out_ht; out_y++) { - for (out_x = 0; out_x < out_wd; out_x++) { - const int8_t *filter_data_ptr = filter_data; - const int32_t base_y = stride_ht * out_y; - const int32_t base_x = stride_wd * out_x; - const int8_t *input_base_ptr = input_data + (base_y * input_wd + base_x) * 3; - int16_t *filter_sum = (int16_t *) scratch_buffer; - for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) { - int32_t conv_out = 0; - - for (filter_y_idx = 0; filter_y_idx < 3; filter_y_idx++) { - const int8_t *input_data_ptr = input_base_ptr + (filter_y_idx * input_wd) * 3; - conv_out += (*input_data_ptr++) * (*filter_data_ptr++); - conv_out += (*input_data_ptr++) * (*filter_data_ptr++); - conv_out += (*input_data_ptr++) * (*filter_data_ptr++); - - conv_out += (*input_data_ptr++) * (*filter_data_ptr++); - conv_out += (*input_data_ptr++) * (*filter_data_ptr++); - conv_out += (*input_data_ptr++) * (*filter_data_ptr++); - - conv_out += (*input_data_ptr++) * (*filter_data_ptr++); - conv_out += (*input_data_ptr++) * (*filter_data_ptr++); - conv_out += (*input_data_ptr++) * (*filter_data_ptr++); - } - - conv_out += *filter_sum++ * input_offset; - - if (bias) { - conv_out += bias[out_ch_idx]; - } - conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]); - conv_out += out_offset; - conv_out = max(conv_out, activation_min); - conv_out = min(conv_out, activation_max); - *out_data++ = (int8_t) conv_out; - } - } - } -} - -int esp_nn_get_conv_scratch_size_esp32s3(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const conv_params_t *conv_params) -{ - const uint16_t input_wd = input_dims->width; - const uint16_t input_ht = input_dims->height; - const uint16_t in_ch = input_dims->channels; - const uint16_t filter_wd = filter_dims->width; - const uint16_t filter_ht = filter_dims->height; - const uint16_t out_ch = output_dims->channels; - const uint16_t pad_wd = conv_params->padding.width; - const uint16_t pad_ht = conv_params->padding.height; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - - int filter_size = filter_wd * filter_ht * in_ch * out_ch; - int input_size = input_wd * input_ht * in_ch; - - int transpose_buf_size = 2 * (8 * in_ch); /* to store intermediate data */ - if (input_wd * input_ht < 8) { - transpose_buf_size = 0; // not using this for leftover - } - int align_buf_size = 32; /* extra buffer for alignment */ - if (in_ch % 8 == 0 && filter_wd == 1 && filter_ht == 1 && - pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) { - return filter_size + transpose_buf_size + align_buf_size; - } - return 2 * (filter_size + input_size) + transpose_buf_size + align_buf_size; -} - -void esp_nn_set_conv_scratch_buf_esp32s3(void *buf) -{ - scratch_buffer = (int16_t *) buf; -} - -void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims, - const int8_t *input, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const conv_params_t *conv_params, - const quant_data_t *quant_data) -{ - const uint16_t input_wd = input_dims->width; - const uint16_t input_ht = input_dims->height; - const uint16_t channels = input_dims->channels; - const int32_t input_offset = conv_params->in_offset; - const int32_t out_offset = conv_params->out_offset; - const uint16_t pad_wd = conv_params->padding.width; - const uint16_t pad_ht = conv_params->padding.height; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - const uint16_t filter_wd = filter_dims->width; - const uint16_t filter_ht = filter_dims->height; - const uint16_t out_wd = output_dims->width; - const uint16_t out_ht = output_dims->height; - const uint16_t out_channels = output_dims->channels; - const int32_t *out_shift = quant_data->shift; - const int32_t *out_mult = quant_data->mult; - const int32_t activation_min = conv_params->activation.min; - const int32_t activation_max = conv_params->activation.max; - - int filter_size = filter_wd * filter_ht * channels * out_channels; - int input_size = input_wd * input_ht * channels; - int align_len = 16 - (filter_size & 15); - int16_t *filter_data16 = scratch_buffer; - int16_t *input_data16 = scratch_buffer + filter_size + align_len; - - if (scratch_buffer == NULL) { - printf("esp_nn_conv error! scratch_buffer not set!\n"); - return; - } - - if (channels % 8 == 0 && filter_wd == 1 && filter_ht == 1 && - pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) { - int8_t *filter_aligned = (int8_t *) scratch_buffer; - int scratch_offset = (int) (filter_aligned + filter_size); - void *scratch_buf = (void *) (scratch_offset + 16 - (scratch_offset & 15)); - memcpy(filter_aligned, filter_data, filter_size); // copy to aligned address - esp_nn_conv_s8_mult8_1x1_esp32s3( - input, input_wd, input_ht, channels, input_offset, filter_aligned, - bias, out_data, out_wd, out_ht, out_channels, out_offset, - out_shift, out_mult, activation_min, activation_max, scratch_buf); - } else if (channels % 4 == 0 && filter_wd == 1 && filter_ht == 1 && - (input_wd * input_ht) % 4 == 0 && /* TODO: remove this check */ - pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) { - int scratch_offset = (int) (input_data16 + input_size); - void *scratch_buf = (void *) (scratch_offset + 16 - (scratch_offset & 15)); - esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size); - esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input, input_data16, input_size, input_offset); - esp_nn_conv_s16_mult4_1x1_esp32s3( - input_data16, input_wd, input_ht, channels, filter_data16, - bias, out_data, out_wd, out_ht, out_channels, out_offset, - out_shift, out_mult, activation_min, activation_max, scratch_buf); - } else if (channels % 8 == 0) { - esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size); - esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input, input_data16, input_size, input_offset); - esp_nn_conv_s16_mult8_esp32s3( - input_data16, input_wd, input_ht, channels, pad_wd, pad_ht, - stride_wd, stride_ht, filter_data16, filter_wd, filter_ht, bias, - out_data, out_wd, out_ht, out_channels, out_offset, out_shift, - out_mult, activation_min, activation_max); - } else if (pad_wd == 0 && pad_ht == 0) { - if (filter_wd == 3 && filter_ht == 3 && channels == 3) { - esp_nn_conv_s8_pad_valid_ch3_3x3(input, input_wd, input_ht, input_offset, - stride_wd, stride_ht, filter_data, bias, - out_data, out_wd, out_ht, out_channels, out_offset, - out_shift, out_mult, activation_min, activation_max); - } else { - esp_nn_conv_s8_pad_valid(input, input_wd, input_ht, channels, input_offset, - stride_wd, stride_ht, filter_data, filter_wd, filter_ht, bias, - out_data, out_wd, out_ht, out_channels, out_offset, out_shift, - out_mult, activation_min, activation_max); - } - } else { - /* Basic unrolled version */ - esp_nn_conv_s8_unrolled(input_dims, input, filter_dims, filter_data, - bias, output_dims, out_data, conv_params, quant_data); - } -} diff --git a/code/components/esp-nn/src/convolution/esp_nn_conv_opt.c b/code/components/esp-nn/src/convolution/esp_nn_conv_opt.c deleted file mode 100644 index be96430e..00000000 --- a/code/components/esp-nn/src/convolution/esp_nn_conv_opt.c +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include - -int esp_nn_get_conv_scratch_size_opt(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const conv_params_t *conv_params) -{ - return 0; -} - -void esp_nn_set_conv_scratch_buf_opt(const void *buf) -{ - -} - -__attribute__ ((noinline)) -static void esp_nn_conv_s8_1x1(const data_dims_t *input_dims, - const int8_t *input_data, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const conv_params_t *conv_params, - const quant_data_t *quant_data) -{ - const uint16_t input_wd = input_dims->width; - const uint16_t in_channels = input_dims->channels; - const int32_t input_offset = conv_params->in_offset; - const int32_t out_offset = conv_params->out_offset; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - const uint16_t out_wd = output_dims->width; - const uint16_t out_ht = output_dims->height; - const uint16_t out_channels = output_dims->channels; - const int32_t activation_min = conv_params->activation.min; - const int32_t activation_max = conv_params->activation.max; - - for (int32_t in_row = 0; in_row < out_ht * stride_ht; in_row += stride_ht) { - for (int32_t in_col = 0; in_col < out_wd * stride_wd; in_col += stride_wd) { - const int32_t *out_mult = quant_data->mult; - const int32_t *out_shift = quant_data->shift; - const int8_t *filter_ptr = filter_data; - const int8_t *input_base_ptr = input_data + (in_row * input_wd + in_col) * in_channels; - int32_t out_ch_idx = 0; - for (; out_ch_idx < out_channels; out_ch_idx++) { - int32_t conv_out = 0; - - const int8_t *input_ptr = input_base_ptr; - - int32_t in_ch_idx = 0; - for (; in_ch_idx < in_channels - 3; in_ch_idx += 4) { - conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; - conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; - conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; - conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; - } - for (; in_ch_idx < in_channels; in_ch_idx ++) { - conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; - } - if (bias) { - conv_out += bias[out_ch_idx]; - } - conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, *out_mult++, *out_shift++); - conv_out += out_offset; - conv_out = max(conv_out, activation_min); - conv_out = min(conv_out, activation_max); - *out_data++ = (int8_t) conv_out; - } - } - } -} - -/** - * Assumption 1: i/p channels == o/p channels - * Assumption 2: Pointers are valid - * Assumption 3: dialation width = 1 - */ -void esp_nn_conv_s8_opt(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const conv_params_t *conv_params, - const quant_data_t *quant_data) -{ - const uint16_t filter_wd = filter_dims->width; - const uint16_t filter_ht = filter_dims->height; - - if (filter_wd == 1 && filter_ht == 1) { - esp_nn_conv_s8_1x1(input_dims, input_data, filter_data, bias, - output_dims, out_data, conv_params, quant_data); - return; - } - - const uint16_t input_wd = input_dims->width; - const uint16_t input_ht = input_dims->height; - const uint16_t in_channels = input_dims->channels; - const int32_t input_offset = conv_params->in_offset; - const int32_t out_offset = conv_params->out_offset; - const uint16_t pad_wd = conv_params->padding.width; - const uint16_t pad_ht = conv_params->padding.height; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - const uint16_t out_wd = output_dims->width; - const uint16_t out_ht = output_dims->height; - const uint16_t out_channels = output_dims->channels; - const int32_t activation_min = conv_params->activation.min; - const int32_t activation_max = conv_params->activation.max; - - int32_t out_ch_idx, out_y, out_x, filter_y_idx, filter_x_idx; - - for (out_y = 0; out_y < out_ht; out_y++) { - for (out_x = 0; out_x < out_wd; out_x++) { - const int32_t *out_shift = quant_data->shift; - const int32_t *out_mult = quant_data->mult; - for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) { - int32_t conv_out = 0; - - const int32_t base_y = stride_ht * out_y - pad_ht; - const int32_t base_x = stride_wd * out_x - pad_wd; - - const int32_t filter_y_start = max(0, -base_y); - const int32_t filter_x_start = max(0, -base_x); - - const int32_t filter_y_end = min(filter_ht, input_ht - base_y); - const int32_t filter_x_end = min(filter_wd, input_wd - base_x); - - for (filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t in_row = base_y + filter_y_idx; - const int32_t in_col = base_x + filter_x_idx; - - const int8_t *input_ptr = input_data + - (in_row * input_wd + in_col) * in_channels; - const int8_t *filter_ptr = filter_data + - out_ch_idx * in_channels * filter_ht * filter_wd + - (filter_y_idx * filter_wd + filter_x_idx) * in_channels; - int32_t in_ch_idx = 0; - for (; in_ch_idx < in_channels - 3; in_ch_idx += 4) { - conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; - conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; - conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; - conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; - } - for (; in_ch_idx < in_channels; in_ch_idx ++) { - conv_out += (*input_ptr++ + input_offset) * *filter_ptr++; - } - } - } - if (bias) { - conv_out += bias[out_ch_idx]; - } - conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, *out_mult++, *out_shift++); - conv_out += out_offset; - conv_out = max(conv_out, activation_min); - conv_out = min(conv_out, activation_max); - *out_data++ = (int8_t) conv_out; - } - } - } -} diff --git a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_ansi.c b/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_ansi.c deleted file mode 100644 index 1cd02e0f..00000000 --- a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_ansi.c +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -int esp_nn_get_depthwise_conv_scratch_size_ansi(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const dw_conv_params_t *conv_params) -{ - return 0; -} - -void esp_nn_set_depthwise_conv_scratch_buf_ansi(const void *buf) -{ - -} - -void esp_nn_depthwise_conv_s8_ansi(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const dw_conv_params_t *conv_params, - const quant_data_t *quant_data) -{ - const uint16_t input_wd = input_dims->width; - const uint16_t input_ht = input_dims->height; - const uint16_t channels = input_dims->channels; - const int32_t input_offset = conv_params->in_offset; - const int32_t out_offset = conv_params->out_offset; - const uint16_t pad_wd = conv_params->padding.width; - const uint16_t pad_ht = conv_params->padding.height; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - const uint16_t filter_wd = filter_dims->width; - const uint16_t filter_ht = filter_dims->height; - const uint16_t out_wd = output_dims->width; - const uint16_t out_ht = output_dims->height; - const int32_t *out_shift = quant_data->shift; - const int32_t *out_mult = quant_data->mult; - const int32_t activation_min = conv_params->activation.min; - const int32_t activation_max = conv_params->activation.max; - const uint16_t ch_mult = conv_params->ch_mult; - - int out_idx = 0; - for (int out_y = 0; out_y < out_ht; out_y++) { //height loop - const int16_t base_y = (out_y * stride_ht) - pad_ht; - for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop - const int16_t base_x = (out_x * stride_wd) - pad_wd; - for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop - for (int ch_mult_idx = 0; ch_mult_idx < ch_mult; ch_mult_idx++) { - int32_t result = 0; - const int out_ch_idx = ch_mult_idx + ch_idx * ch_mult; - - /* Select filter so as the point doesn't lie outside block */ - int filter_y_start = max(0, -base_y); - int filter_x_start = max(0, -base_x); - int filter_y_end = min(filter_ht, input_ht - base_y); - int filter_x_end = min(filter_wd, input_wd - base_x); - - for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - const int32_t idx_y = base_y + filter_y_idx; - for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t idx_x = base_x + filter_x_idx; - int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; - int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx; - int32_t input_val = input_data[input_index] + input_offset; - int32_t filter_val = filter_data[filter_index]; - result += input_val * filter_val; - } - } - if (bias) { - result += bias[out_ch_idx]; - } - result = esp_nn_multiply_by_quantized_mult(result, out_mult[out_ch_idx], out_shift[out_ch_idx]); - result += out_offset; - result = max(result, activation_min); - result = min(result, activation_max); - - out_data[out_idx++] = result; - } - } - } - } -} diff --git a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_opt.c b/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_opt.c deleted file mode 100644 index 4afea3f3..00000000 --- a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_opt.c +++ /dev/null @@ -1,291 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -int esp_nn_get_depthwise_conv_scratch_size_opt(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const dw_conv_params_t *conv_params) -{ - return 0; -} - -void esp_nn_set_depthwise_conv_scratch_buf_opt(const void *buf) -{ - -} - -/* common channel multiplier == 1 case */ -__attribute__ ((noinline)) -static void esp_nn_depthwise_conv_s8_ch_mult_1(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const dw_conv_params_t *conv_params, - const quant_data_t *quant_data) -{ - const uint16_t input_wd = input_dims->width; - const uint16_t input_ht = input_dims->height; - const uint16_t channels = input_dims->channels; - const int32_t input_offset = conv_params->in_offset; - const int32_t out_offset = conv_params->out_offset; - const uint16_t pad_wd = conv_params->padding.width; - const uint16_t pad_ht = conv_params->padding.height; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - const uint16_t filter_wd = filter_dims->width; - const uint16_t filter_ht = filter_dims->height; - const uint16_t out_wd = output_dims->width; - const uint16_t out_ht = output_dims->height; - const int32_t activation_min = conv_params->activation.min; - const int32_t activation_max = conv_params->activation.max; - - int out_idx = 0; - for (int out_y = 0; out_y < out_ht; out_y++) { //height loop - const int16_t base_y = (out_y * stride_ht) - pad_ht; - for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop - const int16_t base_x = (out_x * stride_wd) - pad_wd; - - const int32_t *out_shift = quant_data->shift; - const int32_t *out_mult = quant_data->mult; - - /* Select filter so as the point doesn't lie outside block */ - int filter_y_start = max(0, -base_y); - int filter_x_start = max(0, -base_x); - int filter_y_end = min(filter_ht, input_ht - base_y); - int filter_x_end = min(filter_wd, input_wd - base_x); - - int ch_idx = 0; - for (; ch_idx < channels - 3; ch_idx += 4) {//channel_loop - int32_t result0 = 0; - int32_t result1 = 0; - int32_t result2 = 0; - int32_t result3 = 0; - - for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - const int32_t idx_y = base_y + filter_y_idx; - for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t idx_x = base_x + filter_x_idx; - int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; - int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels) + ch_idx; - int32_t input_val0 = input_data[input_index + 0] + input_offset; - int32_t input_val1 = input_data[input_index + 1] + input_offset; - int32_t input_val2 = input_data[input_index + 2] + input_offset; - int32_t input_val3 = input_data[input_index + 3] + input_offset; - int32_t filter_val0 = filter_data[filter_index + 0]; - int32_t filter_val1 = filter_data[filter_index + 1]; - int32_t filter_val2 = filter_data[filter_index + 2]; - int32_t filter_val3 = filter_data[filter_index + 3]; - result0 += input_val0 * filter_val0; - result1 += input_val1 * filter_val1; - result2 += input_val2 * filter_val2; - result3 += input_val3 * filter_val3; - } - } - if (bias) { - result0 += bias[ch_idx + 0]; - result1 += bias[ch_idx + 1]; - result2 += bias[ch_idx + 2]; - result3 += bias[ch_idx + 3]; - } - result0 = esp_nn_multiply_by_quantized_mult_fast(result0, *out_mult++, *out_shift++); - result1 = esp_nn_multiply_by_quantized_mult_fast(result1, *out_mult++, *out_shift++); - result2 = esp_nn_multiply_by_quantized_mult_fast(result2, *out_mult++, *out_shift++); - result3 = esp_nn_multiply_by_quantized_mult_fast(result3, *out_mult++, *out_shift++); - - result0 += out_offset; - result1 += out_offset; - result2 += out_offset; - result3 += out_offset; - - result0 = max(result0, activation_min); - result1 = max(result1, activation_min); - result2 = max(result2, activation_min); - result3 = max(result3, activation_min); - - result0 = min(result0, activation_max); - result1 = min(result1, activation_max); - result2 = min(result2, activation_max); - result3 = min(result3, activation_max); - - out_data[out_idx++] = result0; - out_data[out_idx++] = result1; - out_data[out_idx++] = result2; - out_data[out_idx++] = result3; - } - for (; ch_idx < channels; ch_idx++) {//channel_loop - int32_t result = 0; - - for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - const int32_t idx_y = base_y + filter_y_idx; - for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t idx_x = base_x + filter_x_idx; - int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; - int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels) + ch_idx; - int32_t input_val = input_data[input_index] + input_offset; - int32_t filter_val = filter_data[filter_index]; - result += input_val * filter_val; - } - } - if (bias) { - result += bias[ch_idx]; - } - result = esp_nn_multiply_by_quantized_mult_fast(result, *out_mult++, *out_shift++); - result += out_offset; - result = max(result, activation_min); - result = min(result, activation_max); - - out_data[out_idx++] = result; - } - } - } -} - -void esp_nn_depthwise_conv_s8_opt(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const dw_conv_params_t *conv_params, - const quant_data_t *quant_data) -{ - const uint16_t ch_mult = conv_params->ch_mult; - if (ch_mult == 1) { - esp_nn_depthwise_conv_s8_ch_mult_1(input_dims, input_data, filter_dims, filter_data, - bias, output_dims, out_data, conv_params, quant_data); - return; - } - const uint16_t input_wd = input_dims->width; - const uint16_t input_ht = input_dims->height; - const uint16_t channels = input_dims->channels; - const int32_t input_offset = conv_params->in_offset; - const int32_t out_offset = conv_params->out_offset; - const uint16_t pad_wd = conv_params->padding.width; - const uint16_t pad_ht = conv_params->padding.height; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - const uint16_t filter_wd = filter_dims->width; - const uint16_t filter_ht = filter_dims->height; - const uint16_t out_wd = output_dims->width; - const uint16_t out_ht = output_dims->height; - const int32_t activation_min = conv_params->activation.min; - const int32_t activation_max = conv_params->activation.max; - - int out_idx = 0; - for (int out_y = 0; out_y < out_ht; out_y++) { //height loop - const int16_t base_y = (out_y * stride_ht) - pad_ht; - for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop - const int16_t base_x = (out_x * stride_wd) - pad_wd; - - const int32_t *out_shift = quant_data->shift; - const int32_t *out_mult = quant_data->mult; - - /* Select filter so as the point doesn't lie outside block */ - int filter_y_start = max(0, -base_y); - int filter_x_start = max(0, -base_x); - int filter_y_end = min(filter_ht, input_ht - base_y); - int filter_x_end = min(filter_wd, input_wd - base_x); - - for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop - int ch_mult_idx = 0; - for (; ch_mult_idx < ch_mult - 3; ch_mult_idx += 4) { - int32_t result0 = 0; - int32_t result1 = 0; - int32_t result2 = 0; - int32_t result3 = 0; - const int out_ch_idx = ch_idx * ch_mult + ch_mult_idx; - - for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - const int32_t idx_y = base_y + filter_y_idx; - for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t idx_x = base_x + filter_x_idx; - int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; - int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx; - int32_t input_val = input_data[input_index] + input_offset; - int32_t filter_val0 = filter_data[filter_index + 0]; - int32_t filter_val1 = filter_data[filter_index + 1]; - int32_t filter_val2 = filter_data[filter_index + 2]; - int32_t filter_val3 = filter_data[filter_index + 3]; - result0 += input_val * filter_val0; - result1 += input_val * filter_val1; - result2 += input_val * filter_val2; - result3 += input_val * filter_val3; - } - } - if (bias) { - result0 += bias[out_ch_idx + 0]; - result1 += bias[out_ch_idx + 1]; - result2 += bias[out_ch_idx + 2]; - result3 += bias[out_ch_idx + 3]; - } - result0 = esp_nn_multiply_by_quantized_mult_fast(result0, *out_mult++, *out_shift++); - result1 = esp_nn_multiply_by_quantized_mult_fast(result1, *out_mult++, *out_shift++); - result2 = esp_nn_multiply_by_quantized_mult_fast(result2, *out_mult++, *out_shift++); - result3 = esp_nn_multiply_by_quantized_mult_fast(result3, *out_mult++, *out_shift++); - - result0 += out_offset; - result1 += out_offset; - result2 += out_offset; - result3 += out_offset; - - result0 = max(result0, activation_min); - result1 = max(result1, activation_min); - result2 = max(result2, activation_min); - result3 = max(result3, activation_min); - result0 = min(result0, activation_max); - result1 = min(result1, activation_max); - result2 = min(result2, activation_max); - result3 = min(result3, activation_max); - - out_data[out_idx++] = result0; - out_data[out_idx++] = result1; - out_data[out_idx++] = result2; - out_data[out_idx++] = result3; - } - for (; ch_mult_idx < ch_mult; ch_mult_idx++) { - int32_t result = 0; - const int out_ch_idx = ch_idx * ch_mult + ch_mult_idx; - - for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - const int32_t idx_y = base_y + filter_y_idx; - for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t idx_x = base_x + filter_x_idx; - int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; - int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx; - int32_t input_val = input_data[input_index] + input_offset; - int32_t filter_val = filter_data[filter_index]; - result += input_val * filter_val; - } - } - if (bias) { - result += bias[out_ch_idx]; - } - result = esp_nn_multiply_by_quantized_mult_fast(result, *out_mult++, *out_shift++); - result += out_offset; - result = max(result, activation_min); - result = min(result, activation_max); - - out_data[out_idx++] = result; - } - } - } - } -} diff --git a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c b/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c deleted file mode 100644 index 9167a43f..00000000 --- a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c +++ /dev/null @@ -1,543 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include - -static int16_t *scratch_buffer = NULL; - -extern void esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3(const int16_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const uint16_t pad_wd, - const uint16_t pad_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint16_t ch_mult, - const int16_t *filter_data, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max); - -extern void esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(const int8_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const int32_t input_offset, - const uint16_t stride_wd, - const uint16_t stride_ht, - const int8_t *filter_data, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max); - -extern void esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3(const int16_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const uint16_t stride_wd, - const uint16_t stride_ht, - const int16_t *filter_data, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max); - -extern void esp_nn_depthwise_conv_s16_mult8_esp32s3(const int16_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const uint16_t pad_wd, - const uint16_t pad_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint16_t ch_mult, - const int16_t *filter_data, - const uint16_t filter_wd, - const uint16_t filter_ht, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max); - -extern void esp_nn_depthwise_conv_s16_mult4_esp32s3(const int16_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const uint16_t pad_wd, - const uint16_t pad_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint16_t ch_mult, - const int16_t *filter_data, - const uint16_t filter_wd, - const uint16_t filter_ht, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max); - -extern void esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3(const int16_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const uint16_t pad_wd, - const uint16_t pad_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const int16_t *filter_data, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max); - -extern void esp_nn_depthwise_conv_s16_mult1_esp32s3(const int16_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const uint16_t pad_wd, - const uint16_t pad_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const int16_t *filter_data, - const uint16_t filter_wd, - const uint16_t filter_ht, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max); - -extern void esp_nn_s8_to_s16_esp32s3(const int8_t *src, int16_t *dst, const int size); - -extern void esp_nn_aligned_s8_to_s16_with_offset_esp32s3(const int8_t *src, int16_t *dst, - const int size, const int32_t offset); - -static void esp_nn_depthwise_conv_s8_unrolled(const int8_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const int32_t input_offset, - const uint16_t pad_wd, - const uint16_t pad_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint16_t ch_mult, - const int8_t *filter_data, - const uint16_t filter_wd, - const uint16_t filter_ht, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max) -{ - int out_idx = 0; - for (int out_y = 0; out_y < out_ht; out_y++) { //height loop - const int16_t base_y = (out_y * stride_ht) - pad_ht; - for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop - const int16_t base_x = (out_x * stride_wd) - pad_wd; - for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop - int ch_mult_idx = 0; - for (; ch_mult_idx < ch_mult - 3; ch_mult_idx += 4) { - int32_t result0 = 0, result1 = 0, result2 = 0, result3 = 0; - const int out_ch_idx = ch_mult_idx + ch_idx * ch_mult; - - /* Select filter so as the point doesn't lie outside block */ - int filter_y_start = max(0, -base_y); - int filter_x_start = max(0, -base_x); - int filter_y_end = min(filter_ht, input_ht - base_y); - int filter_x_end = min(filter_wd, input_wd - base_x); - - for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - const int32_t idx_y = base_y + filter_y_idx; - for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t idx_x = base_x + filter_x_idx; - int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; - int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx; - int32_t input_val = input_data[input_index] + input_offset; - int32_t filter_val0 = filter_data[filter_index + 0]; - int32_t filter_val1 = filter_data[filter_index + 1]; - int32_t filter_val2 = filter_data[filter_index + 2]; - int32_t filter_val3 = filter_data[filter_index + 3]; - result0 += input_val * filter_val0; - result1 += input_val * filter_val1; - result2 += input_val * filter_val2; - result3 += input_val * filter_val3; - } - } - if (bias) { - result0 += bias[out_ch_idx + 0]; - result1 += bias[out_ch_idx + 1]; - result2 += bias[out_ch_idx + 2]; - result3 += bias[out_ch_idx + 3]; - } - result0 = esp_nn_multiply_by_quantized_mult(result0, - out_mult[out_ch_idx + 0], out_shift[out_ch_idx + 0]); - result1 = esp_nn_multiply_by_quantized_mult(result1, - out_mult[out_ch_idx + 1], out_shift[out_ch_idx + 1]); - result2 = esp_nn_multiply_by_quantized_mult(result2, - out_mult[out_ch_idx + 2], out_shift[out_ch_idx + 2]); - result3 = esp_nn_multiply_by_quantized_mult(result3, - out_mult[out_ch_idx + 3], out_shift[out_ch_idx + 3]); - - result0 += out_offset; - result1 += out_offset; - result2 += out_offset; - result3 += out_offset; - - result0 = max(result0, activation_min); - result1 = max(result1, activation_min); - result2 = max(result2, activation_min); - result3 = max(result3, activation_min); - - result0 = min(result0, activation_max); - result1 = min(result1, activation_max); - result2 = min(result2, activation_max); - result3 = min(result3, activation_max); - - out_data[out_idx++] = result0; - out_data[out_idx++] = result1; - out_data[out_idx++] = result2; - out_data[out_idx++] = result3; - } - - /* left-over */ - for (; ch_mult_idx < ch_mult; ch_mult_idx++) { - int32_t result = 0; - const int out_ch_idx = ch_mult_idx + ch_idx * ch_mult; - - /* Select filter so as the point doesn't lie outside block */ - int filter_y_start = max(0, -base_y); - int filter_x_start = max(0, -base_x); - int filter_y_end = min(filter_ht, input_ht - base_y); - int filter_x_end = min(filter_wd, input_wd - base_x); - - for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - const int32_t idx_y = base_y + filter_y_idx; - for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t idx_x = base_x + filter_x_idx; - int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; - int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx; - int32_t input_val = input_data[input_index] + input_offset; - int32_t filter_val = filter_data[filter_index]; - result += input_val * filter_val; - } - } - if (bias) { - result += bias[out_ch_idx]; - } - result = esp_nn_multiply_by_quantized_mult(result, out_mult[out_ch_idx], out_shift[out_ch_idx]); - result += out_offset; - result = max(result, activation_min); - result = min(result, activation_max); - - out_data[out_idx++] = result; - } - } - } - } -} - -void esp_nn_depthwise_conv_s8_ch_mult1(const int8_t *input_data, - const uint16_t input_wd, - const uint16_t input_ht, - const uint16_t channels, - const int32_t input_offset, - const uint16_t pad_wd, - const uint16_t pad_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const int8_t *filter_data, - const uint16_t filter_wd, - const uint16_t filter_ht, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_wd, - const uint16_t out_ht, - const int32_t out_offset, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t activation_min, - const int32_t activation_max) -{ - int out_idx = 0; - for (int out_y = 0; out_y < out_ht; out_y++) { //height loop - const int16_t base_y = (out_y * stride_ht) - pad_ht; - for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop - const int16_t base_x = (out_x * stride_wd) - pad_wd; - for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop - int32_t result = 0; - /* Select filter so as the point doesn't lie outside block */ - int filter_y_start = max(0, -base_y); - int filter_x_start = max(0, -base_x); - int filter_y_end = min(filter_ht, input_ht - base_y); - int filter_x_end = min(filter_wd, input_wd - base_x); - - for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) { - const int32_t idx_y = base_y + filter_y_idx; - for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) { - const int32_t idx_x = base_x + filter_x_idx; - int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx; - int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * channels + ch_idx; - int32_t input_val = input_data[input_index] + input_offset; - int32_t filter_val = filter_data[filter_index]; - result += input_val * filter_val; - } - } - if (bias) { - result += bias[ch_idx]; - } - result = esp_nn_multiply_by_quantized_mult(result, out_mult[ch_idx], out_shift[ch_idx]); - result += out_offset; - result = max(result, activation_min); - result = min(result, activation_max); - - out_data[out_idx++] = result; - } - } - } -} - -int esp_nn_get_depthwise_conv_scratch_size_esp32s3(const data_dims_t *input_dims, - const data_dims_t *filter_dims, - const data_dims_t *output_dims, - const dw_conv_params_t *conv_params) -{ - const uint16_t input_wd = input_dims->width; - const uint16_t input_ht = input_dims->height; - const uint16_t channels = input_dims->channels; - const uint16_t filter_wd = filter_dims->width; - const uint16_t filter_ht = filter_dims->height; - const uint16_t ch_mult = conv_params->ch_mult; - const uint16_t out_wd = output_dims->width; - const uint16_t out_ht = output_dims->height; - const uint16_t pad_wd = conv_params->padding.width; - const uint16_t pad_ht = conv_params->padding.height; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - - int filter_size = filter_wd * filter_ht * channels * ch_mult; - int pad_width = 0, pad_height = 0; - - if ((ch_mult == 1) && (channels % 8 == 0) && (filter_wd == 3) && (filter_ht == 3)) { - if (channels % 16 == 0) { - if (pad_wd || pad_ht) { - pad_width = pad_wd * 2; - pad_height = pad_ht * 2; - } else { - // check if we need to pad additionally - pad_width = (out_wd * stride_wd + filter_wd - 1) - input_wd; - pad_height = (out_ht * stride_ht + filter_ht - 1) - input_ht; - // printf("in(%d %d %d), out(%d %d), filter (%d %d) stride (%d %d), pad (%d %d)", - // input_wd, input_ht, channels, out_wd, out_ht, filter_wd, filter_ht, - // stride_wd, stride_ht, pad_wd, pad_ht); - } - if (pad_width || pad_height) { - int input_size = (input_wd + pad_width) * (input_ht + pad_height) * channels; - // printf("ask1 %d\n", filter_size + input_size + 16); - return filter_size + input_size + 16; // 16 for alignment - } else { - // printf("ask2 %d\n", filter_size + 16); - return filter_size + 16; // 16 for alignment - } - } else { - int input_size = input_wd * input_ht * channels; - // printf("ask3 %d\n", 2 * (filter_size + input_size) + 16); - return 2 * (filter_size + input_size) + 16; // 16 for alignment - } - } else if (ch_mult % 4 == 0) { - int input_size = input_wd * input_ht * channels; - // printf("ask4 %d\n", 2 * (filter_size + input_size) + 16); - return 2 * (filter_size + input_size) + 16; // 16 for alignment - } - return 32; // just few bytes -} - -void esp_nn_set_depthwise_conv_scratch_buf_esp32s3(void *buf) -{ - scratch_buffer = (int16_t *) buf; -} - -/** - * Assumption 1: i/p channels == o/p channels - * Assumption 2: Pointers are valid - * Assumption 3: dialation width = 1 - */ - - - -void esp_nn_depthwise_conv_s8_esp32s3(const data_dims_t *input_dims, - const int8_t *input_data, - const data_dims_t *filter_dims, - const int8_t *filter_data, - const int32_t *bias, - const data_dims_t *output_dims, - int8_t *out_data, - const dw_conv_params_t *conv_params, - const quant_data_t *quant_data) -{ - const uint16_t input_wd = input_dims->width; - const uint16_t input_ht = input_dims->height; - const uint16_t channels = input_dims->channels; - const int32_t input_offset = conv_params->in_offset; - const int32_t out_offset = conv_params->out_offset; - const uint16_t pad_wd = conv_params->padding.width; - const uint16_t pad_ht = conv_params->padding.height; - const uint16_t stride_wd = conv_params->stride.width; - const uint16_t stride_ht = conv_params->stride.height; - const uint16_t filter_wd = filter_dims->width; - const uint16_t filter_ht = filter_dims->height; - const uint16_t out_wd = output_dims->width; - const uint16_t out_ht = output_dims->height; - const int32_t *out_shift = quant_data->shift; - const int32_t *out_mult = quant_data->mult; - const int32_t activation_min = conv_params->activation.min; - const int32_t activation_max = conv_params->activation.max; - const uint16_t ch_mult = conv_params->ch_mult; - - int filter_size = filter_wd * filter_ht * channels * ch_mult; - int align_len = 16 - (filter_size & 15); - int input_size = input_wd * input_ht * channels; - int16_t *filter_data16 = scratch_buffer; - int16_t *input_data16 = scratch_buffer + filter_size + align_len; - if (scratch_buffer == NULL) { - printf("esp_nn_depthwise_conv error! scratch_buffer not set!\n"); - return; - } - - if ((ch_mult == 1) && (channels % 8 == 0)) { - if ((filter_wd == 3) && (filter_ht == 3)) { - if ((channels % 16 == 0) && (pad_wd == 1) && (pad_ht == 1)) { - /* process in 8 bits */ - int8_t *filter_aligned = (int8_t *) scratch_buffer; - int8_t *input_padded = (int8_t *) scratch_buffer + filter_size + align_len; - memcpy(filter_aligned, filter_data, filter_size); - esp_nn_aligned_s8_pad_with_value(input_data, input_padded, input_wd, input_ht, channels, - -input_offset, pad_wd, pad_ht); - esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(input_padded, input_wd + 2 * pad_wd, - input_ht + 2 * pad_ht, channels, input_offset, - stride_wd, stride_ht, filter_aligned, bias, - out_data, out_wd, out_ht, out_offset, out_shift, - out_mult, activation_min, activation_max); - } else if ((channels % 16 == 0) && (pad_wd == 0) && (pad_ht == 0)) { - /* process in 8 bits */ - int8_t *filter_aligned = (int8_t *) scratch_buffer; - int8_t *input_padded = (int8_t *) scratch_buffer + filter_size + align_len; - - // check if we need to pad additionally - int pad_right = (out_wd * stride_wd + filter_wd - 1) - input_wd; - int pad_bottom = (out_ht * stride_ht + filter_ht - 1) - input_ht; - if (pad_right || pad_bottom) { // pad right and bottom - esp_nn_aligned_s8_pad_end_with_value(input_data, input_padded, input_wd, input_ht, - channels, -input_offset, pad_right, pad_bottom); - } else { - input_padded = (int8_t *) input_data; - } - memcpy(filter_aligned, filter_data, filter_size); - esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(input_padded, input_wd + pad_right, - input_ht + pad_bottom, channels, input_offset, - stride_wd, stride_ht, filter_aligned, bias, - out_data, out_wd, out_ht, out_offset, out_shift, - out_mult, activation_min, activation_max); - } else { /* (channels % 8) == 0 */ - esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size); - esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input_data, input_data16, input_size, input_offset); - esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3(input_data16, input_wd, input_ht, channels, - pad_wd, pad_ht, stride_wd, stride_ht, filter_data16, - bias, out_data, out_wd, out_ht, out_offset, out_shift, - out_mult, activation_min, activation_max); - } - } else { // all other ch_mult == 1, `channels % 8 == 0` - esp_nn_depthwise_conv_s8_ch_mult1(input_data, input_wd, input_ht, channels, input_offset, - pad_wd, pad_ht, stride_wd, stride_ht, - filter_data, filter_wd, filter_ht, - bias, out_data, out_wd, out_ht, out_offset, out_shift, - out_mult, activation_min, activation_max); - } - } else if (ch_mult % 8 == 0) { - esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size); - esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input_data, input_data16, input_size, input_offset); - if (filter_wd == 3 && filter_ht == 3) { - esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3(input_data16, input_wd, input_ht, channels, - pad_wd, pad_ht, stride_wd, stride_ht, ch_mult, - filter_data16, bias, - out_data, out_wd, out_ht, out_offset, out_shift, - out_mult, activation_min, activation_max); - } else { - esp_nn_depthwise_conv_s16_mult8_esp32s3(input_data16, input_wd, input_ht, channels, - pad_wd, pad_ht, stride_wd, stride_ht, ch_mult, - filter_data16, filter_wd, filter_ht, bias, - out_data, out_wd, out_ht, out_offset, out_shift, - out_mult, activation_min, activation_max); - } - } else if (ch_mult % 4 == 0) { - esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size); - esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input_data, input_data16, input_size, input_offset); - esp_nn_depthwise_conv_s16_mult4_esp32s3(input_data16, input_wd, input_ht, channels, - pad_wd, pad_ht, stride_wd, stride_ht, ch_mult, - filter_data16, filter_wd, filter_ht, bias, - out_data, out_wd, out_ht, out_offset, out_shift, - out_mult, activation_min, activation_max); - } else { - esp_nn_depthwise_conv_s8_unrolled(input_data, input_wd, input_ht, channels, input_offset, - pad_wd, pad_ht, stride_wd, stride_ht, ch_mult, - filter_data, filter_wd, filter_ht, - bias, out_data, out_wd, out_ht, out_offset, out_shift, - out_mult, activation_min, activation_max); - } -} diff --git a/code/components/esp-nn/src/fully_connected/esp_nn_fully_connected_ansi.c b/code/components/esp-nn/src/fully_connected/esp_nn_fully_connected_ansi.c deleted file mode 100644 index 6d800bc5..00000000 --- a/code/components/esp-nn/src/fully_connected/esp_nn_fully_connected_ansi.c +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include - -void esp_nn_fully_connected_s8_ansi(const int8_t *input_data, - const int32_t input_offset, - const uint16_t row_len, - const int8_t *filter_data, - const int32_t filter_offset, - const int32_t *bias, - int8_t *out_data, - const uint16_t out_channels, - const int32_t out_offset, - const int32_t out_shift, - const int32_t out_mult, - const int32_t activation_min, - const int32_t activation_max) -{ - for (int32_t out_c = 0; out_c < out_channels; ++out_c) { - int32_t result = 0; - for (int32_t data_idx = 0; data_idx < row_len; data_idx++) { - int32_t filter_index = row_len * out_c + data_idx; - int32_t input_val = input_data[data_idx]; - int32_t filter_val = filter_data[filter_index]; - result += (filter_val + filter_offset) * (input_val + input_offset); - } - if (bias) { - result += bias[out_c]; - } - result = esp_nn_multiply_by_quantized_mult(result, out_mult, out_shift); - result += out_offset; - result = max(result, activation_min); - result = min(result, activation_max); - out_data[out_c] = (int8_t) result; - } -} diff --git a/code/components/esp-nn/src/pooling/esp_nn_avg_pool_ansi.c b/code/components/esp-nn/src/pooling/esp_nn_avg_pool_ansi.c deleted file mode 100644 index 03846aa0..00000000 --- a/code/components/esp-nn/src/pooling/esp_nn_avg_pool_ansi.c +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include - -void esp_nn_avg_pool_s8_ansi(const int8_t *input, - const uint16_t input_wd, - const uint16_t input_ht, - int8_t *output, - const uint16_t output_wd, - const uint16_t output_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint16_t filter_wd, - const uint16_t filter_ht, - const uint16_t pad_wd, - const uint16_t pad_ht, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t channels) -{ - int32_t base_y = -pad_ht; - for (int32_t out_y = 0; out_y < output_ht; out_y++, base_y += stride_ht) { - int32_t base_x = -pad_wd; - for (int32_t out_x = 0; out_x < output_wd; out_x++, base_x += stride_wd) { - for (int32_t ch_idx = 0; ch_idx < channels; ch_idx++) { - int32_t result = 0; - int32_t filter_cnt = 0; - /* Make sure filter does not cross the input box */ - int32_t filter_y_start = max(0, -base_y); - int32_t filter_x_start = max(0, -base_x); - - int32_t filter_y_end = min(filter_ht, input_ht - base_y); - int32_t filter_x_end = min(filter_wd, input_wd - base_x); - - for (int32_t filter_y = filter_y_start; filter_y < filter_y_end; filter_y++) { - for (int32_t filter_x = filter_x_start; filter_x < filter_x_end; filter_x++) { - int32_t in_x_idx = base_x + filter_x; - int32_t in_y_idx = base_y + filter_y; - int32_t input_index = (in_y_idx * input_wd + in_x_idx) * channels + ch_idx; - result += input[input_index]; - filter_cnt++; - } - } - - /* Rounded average */ - result = result > 0 ? (result + filter_cnt / 2) / filter_cnt - : (result - filter_cnt / 2) / filter_cnt; - - /* Activation function */ - result = max(result, activation_min); - result = min(result, activation_max); - - int32_t output_index = (out_y * output_wd + out_x) * channels + ch_idx; - output[output_index] = (int8_t) result; - } - } - } -} diff --git a/code/components/esp-nn/src/pooling/esp_nn_max_pool_ansi.c b/code/components/esp-nn/src/pooling/esp_nn_max_pool_ansi.c deleted file mode 100644 index 4ca5c42d..00000000 --- a/code/components/esp-nn/src/pooling/esp_nn_max_pool_ansi.c +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include - -void esp_nn_max_pool_s8_ansi(const int8_t *input, - const uint16_t input_wd, - const uint16_t input_ht, - int8_t *output, - const uint16_t output_wd, - const uint16_t output_ht, - const uint16_t stride_wd, - const uint16_t stride_ht, - const uint16_t filter_wd, - const uint16_t filter_ht, - const uint16_t pad_wd, - const uint16_t pad_ht, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t channels) -{ - int32_t base_y = -pad_ht; - for (int32_t out_y = 0; out_y < output_ht; out_y++, base_y += stride_ht) { - int32_t base_x = -pad_wd; - for (int32_t out_x = 0; out_x < output_wd; out_x++, base_x += stride_wd) { - /* Make sure filter does not cross the input box */ - int32_t filter_y_start = max(0, -base_y); - int32_t filter_x_start = max(0, -base_x); - int32_t filter_y_end = min(filter_ht, input_ht - base_y); - int32_t filter_x_end = min(filter_wd, input_wd - base_x); - - for (int32_t ch_idx = 0; ch_idx < channels; ch_idx++) { - int8_t result = INT8_MIN; - - for (int32_t filter_y = filter_y_start; filter_y < filter_y_end; filter_y++) { - for (int32_t filter_x = filter_x_start; filter_x < filter_x_end; filter_x++) { - int32_t in_x_idx = base_x + filter_x; - int32_t in_y_idx = base_y + filter_y; - int32_t input_index = (in_y_idx * input_wd + in_x_idx) * channels + ch_idx; - result = max(input[input_index], result); - } - } - - /* Activation function */ - result = max(result, activation_min); - result = min(result, activation_max); - - int32_t output_index = (out_y * output_wd + out_x) * channels + ch_idx; - output[output_index] = result; - } - } - } -} diff --git a/code/components/esp-nn/src/softmax/esp_nn_softmax_ansi.c b/code/components/esp-nn/src/softmax/esp_nn_softmax_ansi.c deleted file mode 100644 index d71a8616..00000000 --- a/code/components/esp-nn/src/softmax/esp_nn_softmax_ansi.c +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2022 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "softmax_common.h" - -int32_t esp_nn_get_softmax_scratch_size_ansi(const int32_t width, const int32_t height) -{ - (void) width; - (void) height; - return 0; -} - -void esp_nn_set_softmax_scratch_buf_ansi(void *buffer) -{ - (void) buffer; - return; -} - -void esp_nn_softmax_s8_ansi(const int8_t *input_data, - const int32_t height, - const int32_t width, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - int8_t *output_data) -{ - // The representation chosen for the input to the exp() function is Q5.26. - // We need to leave extra space since values that we skip might be as large as - // -32 before multiplying by input mult, and therefore as large as - // -16 afterwards. Note that exp(-8) is definitely not insignificant to - // accumulation, but exp(-16) definitely is. -#define ACCUM_BITS 12 -#define DIFF_BITS 5 - - const int32_t mask = (1 << shift); - int32_t col = 0; - const int8_t *in_ptr = input_data; - int8_t *out_ptr = output_data; - - for (int row_idx = 0; row_idx < height; row_idx++) { - int8_t max_in_row = in_ptr[0]; - for (col = 1; col < width; col++) { - max_in_row = max(max_in_row, in_ptr[col]); - } - - int32_t input_diff = 0; - int32_t sum_of_exps = 0; - - for (col = 0; col < width; col++) { - input_diff = in_ptr[col] - max_in_row; - if (input_diff >= diff_min) { - const int32_t input_diff_rescaled = SAT_HIGH_MUL(input_diff * mask, mult); - const int32_t exp_raw = esp_nn_exp_on_negative_values(input_diff_rescaled); - sum_of_exps += DIV_POW2(exp_raw, ACCUM_BITS); - } - } - - const int32_t headroom_plus1 = esp_nn_clz32((uint32_t) sum_of_exps); - const int32_t shifted_scale = ONE_OVER_ONE_X((sum_of_exps << headroom_plus1) - (1 << 31)); - const int32_t bits_over_unit = ACCUM_BITS - headroom_plus1 + 31 - sizeof(int8_t) * 8; - - for (col = 0; col < width; col++) { - input_diff = in_ptr[col] - max_in_row; - if (input_diff >= diff_min) { - const int32_t input_diff_rescaled = SAT_HIGH_MUL(input_diff * mask, mult); - const int32_t exp_raw = esp_nn_exp_on_negative_values(input_diff_rescaled); - const int32_t shifted_output = SAT_HIGH_MUL(shifted_scale, exp_raw); - const int32_t result = DIV_POW2(shifted_output, bits_over_unit) - 128; - out_ptr[col] = (int8_t) esp_nn_saturate8(result); - } else { - out_ptr[col] = -128; - } - } - in_ptr += width; - out_ptr += width; - } -} diff --git a/code/components/esp-nn/src/softmax/esp_nn_softmax_opt.c b/code/components/esp-nn/src/softmax/esp_nn_softmax_opt.c deleted file mode 100644 index 93337d32..00000000 --- a/code/components/esp-nn/src/softmax/esp_nn_softmax_opt.c +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2022 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "softmax_common.h" -#include - -static int32_t *scratch_buf = NULL; - -/** - * @brief Get scratch buffer size needed by softmax function - * - * @param width - * @param height - * @return size in bytes - * - * @note buffer must be 4 byte aligned - */ -int32_t esp_nn_get_softmax_scratch_size_opt(const int32_t width, const int32_t height) -{ - (void) height; - return width * 4; -} - -/** - * @brief Set scratch buffer to be used by softmax function - * - * @param buffer this can be NULL if one needs to unset it - * must be aligned to 4 bytes - */ -void esp_nn_set_softmax_scratch_buf_opt(void *buffer) -{ - scratch_buf = (int32_t *) buffer; -} - -void esp_nn_softmax_s8_opt(const int8_t *input_data, - const int32_t height, - const int32_t width, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - int8_t *output_data) -{ - if (scratch_buf == NULL) { - printf("%s error! scratch buffer not set\n", __FUNCTION__); - return; - } - // The representation chosen for the input to the exp() function is Q5.26. - // We need to leave extra space since values that we skip might be as large as - // -32 before multiplying by input mult, and therefore as large as - // -16 afterwards. Note that exp(-8) is definitely not insignificant to - // accumulation, but exp(-16) definitely is. -#define ACCUM_BITS 12 -#define DIFF_BITS 5 - - const int32_t mask = (1 << shift); - int32_t col = 0; - const int8_t *in_ptr = input_data; - int8_t *out_ptr = output_data; - - for (int row_idx = 0; row_idx < height; row_idx++) { - int8_t max_in_row = in_ptr[0]; - for (col = 1; col < width; col++) { - max_in_row = max(max_in_row, in_ptr[col]); - } - - int32_t input_diff = 0; - int32_t sum_of_exps = 0; - - for (col = 0; col < width; col++) { - input_diff = in_ptr[col] - max_in_row; - if (input_diff >= diff_min) { - const int32_t input_diff_rescaled = SAT_HIGH_MUL(input_diff * mask, mult); - const int32_t exp_raw = esp_nn_exp_on_negative_values(input_diff_rescaled); - scratch_buf[col] = exp_raw; // store to avoid duplicate calculation later - sum_of_exps += DIV_POW2(exp_raw, ACCUM_BITS); - } - } - - const int32_t headroom_plus1 = esp_nn_clz32((uint32_t) sum_of_exps); - const int32_t shifted_scale = ONE_OVER_ONE_X((sum_of_exps << headroom_plus1) - (1 << 31)); - const int32_t bits_over_unit = ACCUM_BITS - headroom_plus1 + 31 - sizeof(int8_t) * 8; - - for (col = 0; col < width; col++) { - input_diff = in_ptr[col] - max_in_row; - if (input_diff >= diff_min) { - int32_t exp_raw = scratch_buf[col]; - const int32_t shifted_output = SAT_HIGH_MUL(shifted_scale, exp_raw); - const int32_t result = DIV_POW2(shifted_output, bits_over_unit) - 128; - out_ptr[col] = (int8_t) esp_nn_saturate8(result); - } else { - out_ptr[col] = -128; - } - } - in_ptr += width; - out_ptr += width; - } -} diff --git a/code/components/esp-nn/src/softmax/softmax_common.h b/code/components/esp-nn/src/softmax/softmax_common.h deleted file mode 100644 index 254d6ace..00000000 --- a/code/components/esp-nn/src/softmax/softmax_common.h +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2022 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#define MASK_IF_ZERO(x) (x) == 0 ? ~0 : 0 -#define MASK_IF_NON_ZERO(x) (x) != 0 ? ~0 : 0 -#define SELECT_USING_MASK(mask, a, b) ((mask) & (a)) ^ (~(mask) & (b)) -#define SAT_HIGH_MUL(x, y) esp_nn_sat_round_doubling_high_mul((x), (y)) -#define DIV_POW2(x,y) esp_nn_div_by_power_of_two((x), (y)) - -__NN_FORCE_INLINE__ int32_t mul_power_of_2(int val, int exp) -{ - const int32_t thresh = ((1 << (31 - exp)) - 1); - int32_t result = val << exp; - result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), INT32_MAX, result); - result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), INT32_MIN, result); - return result; -} - -/** - * @brief Calculate `1 / (1 + x)` for x in [0, 1] - * - * @param val input value to calculate `1/(1+x)` for - * @return `int32_t` result - * @note Newton-Raphson division - * - * https://en.wikipedia.org/wiki/Division_algorithm#Newton.E2.80.93Raphson_division - * Refer to that page for the logic behind the 48/17 and 32/17 constants. - * Pseudocode: https://en.wikipedia.org/wiki/Division_algorithm#Pseudocode - */ -__NN_FORCE_INLINE__ int32_t esp_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val) -{ - const int64_t sum = (int64_t) val + INT32_MAX; - const int32_t half_denominator = (int32_t) ((sum + (sum >= 0 ? 1 : -1)) / 2L); - int32_t constant_48_over_17 = 1515870810; - int32_t constant_neg_32_over_17 = -1010580540; - int32_t x = constant_48_over_17 + SAT_HIGH_MUL(half_denominator, constant_neg_32_over_17); - const int32_t fixed_2_one = (1 << 29); - - x += mul_power_of_2(SAT_HIGH_MUL(x, fixed_2_one - SAT_HIGH_MUL(half_denominator, x)), 2); - x += mul_power_of_2(SAT_HIGH_MUL(x, fixed_2_one - SAT_HIGH_MUL(half_denominator, x)), 2); - x += mul_power_of_2(SAT_HIGH_MUL(x, fixed_2_one - SAT_HIGH_MUL(half_denominator, x)), 2); - - return mul_power_of_2(x, 1); -} - -#define ONE_OVER_ONE_X(x) esp_nn_one_over_one_plus_x_for_x_in_0_1((x)) - -/** - * @brief Return exp(x) for x < 0. - * - */ -__NN_FORCE_INLINE__ int32_t esp_nn_exp_on_negative_values(int32_t val) -{ - int32_t shift = 24; - - const int32_t one_quarter = (1 << shift); - int32_t mask = one_quarter - 1; - const int32_t val_mod_minus_quarter = (val & mask) - one_quarter; - const int32_t remainder = val_mod_minus_quarter - val; - - // calculate exponent for x in [-1/4, 0) in `result` - const int32_t x = (val_mod_minus_quarter << 5) + (1 << 28); - const int32_t x2 = SAT_HIGH_MUL(x, x); - const int32_t x3 = SAT_HIGH_MUL(x2, x); - const int32_t x4 = SAT_HIGH_MUL(x2, x2); - const int32_t one_over_3 = 715827883; - const int32_t one_over_8 = 1895147668; - - const int32_t x4_over_4 = DIV_POW2(x4, 2); - const int32_t x4_over_4_plus_x3_over_6_plus_x2_over_2 = DIV_POW2(SAT_HIGH_MUL(x4_over_4 + x3, one_over_3) + x2, 1); - int32_t result = one_over_8 + SAT_HIGH_MUL(one_over_8, x + x4_over_4_plus_x3_over_6_plus_x2_over_2); - -#define SELECT_IF_NON_ZERO(x) { \ - mask = MASK_IF_NON_ZERO(remainder & (1 << shift++)); \ - result = SELECT_USING_MASK(mask, SAT_HIGH_MUL(result, x), result); \ -} - - SELECT_IF_NON_ZERO(1672461947) - SELECT_IF_NON_ZERO(1302514674) - SELECT_IF_NON_ZERO(790015084) - SELECT_IF_NON_ZERO(290630308) - SELECT_IF_NON_ZERO(39332535) - SELECT_IF_NON_ZERO(720401) - SELECT_IF_NON_ZERO(242) - -#undef SELECT_IF_NON_ZERO - - mask = MASK_IF_ZERO(val); - return SELECT_USING_MASK(mask, INT32_MAX, result); -} \ No newline at end of file diff --git a/code/components/esp-nn/test_app/CMakeLists.txt b/code/components/esp-nn/test_app/CMakeLists.txt deleted file mode 100644 index 8d332768..00000000 --- a/code/components/esp-nn/test_app/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -# The following lines of boilerplate have to be in your project's -# CMakeLists in this exact order for cmake to work correctly -cmake_minimum_required(VERSION 3.5) - -set(EXTRA_COMPONENT_DIRS "../" "../tests/") -set(IDF_EXCLUDE_COMPONENTS test test_app) - -include($ENV{IDF_PATH}/tools/cmake/project.cmake) -project(test_app) diff --git a/code/components/esp-nn/test_app/main/CMakeLists.txt b/code/components/esp-nn/test_app/main/CMakeLists.txt deleted file mode 100644 index 04161254..00000000 --- a/code/components/esp-nn/test_app/main/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ - -set(COMPONENT_SRCS "main.c") -set(COMPONENT_ADD_INCLUDEDIRS "") - -set(COMPONENT_PRIV_REQUIRES tests) - -register_component() diff --git a/code/components/esp-nn/test_app/main/component.mk b/code/components/esp-nn/test_app/main/component.mk deleted file mode 100644 index 5d85ad38..00000000 --- a/code/components/esp-nn/test_app/main/component.mk +++ /dev/null @@ -1,8 +0,0 @@ -# -# Main component makefile. -# -# This Makefile can be left empty. By default, it will take the sources in the -# src/ directory, compile them and link them into lib(subdirectory_name).a -# in the build directory. This behaviour is entirely configurable, -# please read the ESP-IDF documents if you need to do this. -# diff --git a/code/components/esp-nn/test_app/main/main.c b/code/components/esp-nn/test_app/main/main.c deleted file mode 100644 index 267e35f2..00000000 --- a/code/components/esp-nn/test_app/main/main.c +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include - -#include -#include -#include - -#include -#include - -static const char *TAG = "test_app"; -static uint32_t start_c, start_opt, total_c, total_opt; - -void profile_c_start() -{ - /* initiate profiling */ - start_c = esp_cpu_get_ccount(); -} - -void profile_c_end() -{ - /* record profile number */ - total_c = esp_cpu_get_ccount() - start_c; -} - -void profile_opt_start() -{ - /* initiate profiling */ - start_opt = esp_cpu_get_ccount(); -} - -void profile_opt_end() -{ - /* record profile number */ - total_opt = esp_cpu_get_ccount() - start_opt; -} - -void app_main() -{ - /* s8 tests */ - ESP_LOGI(TAG, "Running s8 tests..."); - esp_nn_add_elementwise_s8_test(); - printf("add, c %u opt %u\n", total_c, total_opt); - esp_nn_mul_elementwise_s8_test(); - printf("mul, c %u opt %u\n", total_c, total_opt); - esp_nn_depthwise_conv_s8_test(); - printf("depthwise, c %u opt %u\n", total_c, total_opt); - esp_nn_conv_s8_test(); - printf("conv2d, c %u opt %u\n", total_c, total_opt); - - esp_nn_relu6_s8_test(); - printf("relu, c %u opt %u\n", total_c, total_opt); - esp_nn_avg_pool_s8_test(); - printf("avg_pool, c %u opt %u\n", total_c, total_opt); - esp_nn_max_pool_s8_test(); - printf("max_pool, c %u opt %u\n", total_c, total_opt); - esp_nn_fully_connected_s8_test(); - printf("fully_connected, c %u opt %u\n", total_c, total_opt); - esp_nn_softmax_s8_test(); - printf("softmax, c %u opt %u\n", total_c, total_opt); - ESP_LOGI(TAG, "s8 tests done!\n"); - - /* u8 tests */ - //ESP_LOGI(TAG, "Running u8 tests..."); - //esp_nn_add_elementwise_u8_test(); - //esp_nn_depthwise_conv_u8_test(); - //esp_nn_conv_u8_test(); - //esp_nn_avg_pool_u8_test(); - //esp_nn_max_pool_u8_test(); - //esp_nn_fully_connected_u8_test(); - //ESP_LOGI(TAG, "u8 tests done!\n"); -} diff --git a/code/components/esp-nn/test_app/sdkconfig.defaults b/code/components/esp-nn/test_app/sdkconfig.defaults deleted file mode 100644 index bb37aac5..00000000 --- a/code/components/esp-nn/test_app/sdkconfig.defaults +++ /dev/null @@ -1,5 +0,0 @@ - -# -# esp-nn -# -CONFIG_NN_ESP32=y diff --git a/code/components/esp-nn/test_app/sdkconfig.defaults.esp32s3 b/code/components/esp-nn/test_app/sdkconfig.defaults.esp32s3 deleted file mode 100644 index 1adc4b01..00000000 --- a/code/components/esp-nn/test_app/sdkconfig.defaults.esp32s3 +++ /dev/null @@ -1,8 +0,0 @@ -# Default configurations for ESP32-S3 - -CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240=y -CONFIG_ESP32S3_SPIRAM_SUPPORT=y - -CONFIG_ESP32S3_DATA_CACHE_64KB=y -CONFIG_ESP32S3_DATA_CACHE_8WAYS=y -CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y diff --git a/code/components/esp-nn/tests/CMakeLists.txt b/code/components/esp-nn/tests/CMakeLists.txt deleted file mode 100644 index 97ec946f..00000000 --- a/code/components/esp-nn/tests/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ - -set(COMPONENT_ADD_INCLUDEDIRS ./include/) -set(COMPONENT_SRCS "src/basic_math_test.c" - "src/convolution_test.c" - "src/fully_connected_test.c" - "src/pooling_test.c" - "src/relu_test.c" - "src/softmax_test.c") - -set(COMPONENT_REQUIRES ) -set(COMPONENT_PRIV_REQUIRES esp-nn) - -register_component() - -target_compile_options(${COMPONENT_LIB} PRIVATE -Wno-unused-function) diff --git a/code/components/esp-nn/tests/README.md b/code/components/esp-nn/tests/README.md deleted file mode 100644 index 41c94235..00000000 --- a/code/components/esp-nn/tests/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Tests for esp_nn library - -- Include these in your test framework and run the framework. -- For IDF test please refer `test_app` diff --git a/code/components/esp-nn/tests/component.mk b/code/components/esp-nn/tests/component.mk deleted file mode 100644 index 2860f3ff..00000000 --- a/code/components/esp-nn/tests/component.mk +++ /dev/null @@ -1,5 +0,0 @@ -#FIXME - -COMPONENT_ADD_INCLUDEDIRS := include/ - -COMPONENT_SRCDIRS := src/ diff --git a/code/components/esp-nn/tests/include/test_functions.h b/code/components/esp-nn/tests/include/test_functions.h deleted file mode 100644 index 3e882efa..00000000 --- a/code/components/esp-nn/tests/include/test_functions.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -/* int8_t ops tests */ -void esp_nn_add_elementwise_s8_test(); -void esp_nn_mul_elementwise_s8_test(); - -void esp_nn_depthwise_conv_s8_test(); -void esp_nn_conv_s8_test(); - -void esp_nn_avg_pool_s8_test(); -void esp_nn_max_pool_s8_test(); - -void esp_nn_fully_connected_s8_test(); - -void esp_nn_relu6_s8_test(); - -void esp_nn_softmax_s8_test(); - -/* uint8_t ops tests */ -void esp_nn_add_elementwise_u8_test(); - -void esp_nn_depthwise_conv_u8_test(); -void esp_nn_conv_u8_test(); - -void esp_nn_avg_pool_u8_test(); -void esp_nn_max_pool_u8_test(); - -void esp_nn_fully_connected_u8_test(); - -/* instructions test functions */ -void compare_instructions_test(); -void arith_instructions_test(); -void min_max_instructions_test(); -void bitwise_instructions_test(); -void load_store_instructions_test(); diff --git a/code/components/esp-nn/tests/include/test_utils.h b/code/components/esp-nn/tests/include/test_utils.h deleted file mode 100644 index a152549b..00000000 --- a/code/components/esp-nn/tests/include/test_utils.h +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include - -/* mult value range */ -#define MULT_MAX INT32_MAX -#define MULT_MIN 0 - -/* shift value range */ -#define SHIFT_MIN -31 -#define SHIFT_MAX 30 - -/** - * @brief callback function to run before C function - */ -void profile_c_start(); - -/** - * @brief callback function to run after C function - */ -void profile_c_end(); - -/** - * @brief callback function to run before optimized function - */ -void profile_opt_start(); - -/** - * @brief callback function to run after optimized function - */ -void profile_opt_end(); - -#define ANSI_COLOR_RED "\x1b[31m" -#define ANSI_COLOR_GREEN "\x1b[32m" -#define ANSI_COLOR_YELLOW "\x1b[33m" -#define ANSI_COLOR_BLUE "\x1b[34m" -#define ANSI_COLOR_MAGENTA "\x1b[35m" -#define ANSI_COLOR_CYAN "\x1b[36m" -#define ANSI_COLOR_RESET "\x1b[0m" - -#define CHECK_EQUAL(ARRAY1, ARRAY2, size) ({ \ - bool res = true; \ - for (int _i = 0; _i < size; _i++) { \ - if (ARRAY1[_i] != ARRAY2[_i]) { \ - res = false; \ - break; \ - } \ - } \ - res; \ -}) - -#define PRINT_ARRAY_INT(ARRAY, width, height) ({ \ - int *_array = (int *) ARRAY; \ - for (int _j = 0; _j < height; _j++) { \ - for (int _i = 0; _i < width; _i++) { \ - printf("%d\t", _array[width * _j + _i]); \ - } \ - printf("\n"); \ - } \ - printf("\n"); \ -}) - -#define PRINT_ARRAY_HEX(ARRAY, width, height) ({ \ - uint8_t *_array = (uint8_t *) ARRAY; \ - for (int _j = 0; _j < height; _j++) { \ - for (int _i = 0; _i < width; _i++) { \ - printf("%02x\t", _array[width * _j + _i]); \ - } \ - printf("\n"); \ - } \ - printf("\n"); \ -}) diff --git a/code/components/esp-nn/tests/src/basic_math_test.c b/code/components/esp-nn/tests/src/basic_math_test.c deleted file mode 100644 index 715d7c78..00000000 --- a/code/components/esp-nn/tests/src/basic_math_test.c +++ /dev/null @@ -1,355 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include - -#include -#include -#include "test_utils.h" - -#if CONFIG_IDF_CMAKE -#if (CONFIG_SPIRAM_SUPPORT && (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC)) -#define IDF_HEAP_CAPS 1 -#endif - -#if IDF_HEAP_CAPS -#include "esp_heap_caps.h" -#endif -#endif - -void esp_nn_add_elementwise_s8_test() -{ - /* prepare data */ - const int size = 1600 + 8 + 7; /* odd len to test leftover */ - int8_t *input1; - int8_t *input2; - int8_t *out_data_c; - int8_t *out_data_opt; - int8_t *input1_orig = NULL; - int8_t *input2_orig = NULL; - int8_t *out_c_orig = NULL; - int8_t *out_opt_orig = NULL; - int32_t input1_offset = 34; - int32_t input2_offset = 35; - int32_t output_offset = 36; - int32_t input1_shift = -8; // right_shift amt always <= 0 - int32_t input2_shift = -8; // right_shift amt always <= 0 - int32_t output_shift = -9; // right_shift amt always <= 0 - int32_t left_shift = 15; // always +ve - int32_t input1_mult = INT32_MAX; - int32_t input2_mult = INT32_MAX; - int32_t output_mult = INT32_MAX; - int32_t activation_min = -128; - int32_t activation_max = 127; - - for (int itr = 0; itr < 10; itr++) { - switch (itr) { - case 0: // all zeros - input1_offset = 0; - input2_offset = 0; - output_offset = 0; - input1_mult = 0; - input2_mult = 0; - output_mult = 0; - input1_shift = 0; - input2_shift = 0; - output_shift = 0; - left_shift = 0; - break; - case 1: // hit min - input1_offset = -127; - input2_offset = -127; - output_offset = -128; - input1_mult = MULT_MIN; - input2_mult = MULT_MIN; - output_mult = MULT_MIN; - input1_shift = 0; - input2_shift = 0; - output_shift = 0; - left_shift = 0; - break; - case 2: // hit max - input1_offset = 128; - input2_offset = 128; - output_offset = -127; - input1_mult = MULT_MAX; - input2_mult = MULT_MAX; - output_mult = MULT_MAX; - input1_shift = SHIFT_MIN; - input2_shift = SHIFT_MIN; - output_shift = SHIFT_MIN; - left_shift = 30 - 8; // since input is 8 bits - break; - case 3: // hit extreme max - input1_offset = 128; - input2_offset = 128; - output_offset = -127; - input1_mult = MULT_MAX; - input2_mult = MULT_MAX; - output_mult = MULT_MAX; - input1_shift = 0; - input2_shift = 0; - output_shift = 0; - left_shift = 30 - 8; // -8 since input is 8 bit - break; - default: // practical random input - input1_offset = rand() % 256 - 127; // range [-127, 128] - input2_offset = rand() % 256 - 127; // range [-127, 128] - output_offset = rand() % 256 - 128; // range [-128, 127] - input1_mult = MULT_MAX / 2 + rand() % INT16_MAX; - input2_mult = MULT_MAX / 2 + rand() % INT16_MAX; - output_mult = MULT_MAX / 2 + rand() % INT16_MAX; - input1_shift = -8 + rand() % 4; - input2_shift = -8 + rand() % 4; - output_shift = -8 + rand() % 4; - left_shift = rand() % 15; - } -#if IDF_HEAP_CAPS - input1_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - input2_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - out_c_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - out_opt_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - - input1 = 16 + input1_orig - ((uint32_t) input1_orig & 0xf); - input2 = 16 + input2_orig - ((uint32_t) input2_orig & 0xf); - out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf); - out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf); -#else - input1 = memalign(16, size); - input2 = memalign(16, size); - out_data_c = memalign(16, size); - out_data_opt = memalign(16, size); - - input1_orig = input1; - input2_orig = input2; - out_c_orig = out_data_c; - out_opt_orig = out_data_opt; -#endif - if (input1_orig == NULL || input2_orig == NULL || out_c_orig == NULL || - out_opt_orig == NULL) { - printf(ANSI_COLOR_RED"%s error allocating buffers\n"ANSI_COLOR_RESET, __FUNCTION__); - goto elementwise_add_test_cleanup; - } - - for (int i = 0; i < size; ++i) { - input1[i] = rand() % 256 - 128; - input2[i] = rand() % 256 - 128; - } - - if (itr == 0) { - /* enable profiler */ - profile_c_start(); - } - /* C function */ - esp_nn_add_elementwise_s8_ansi(input1, input2, input1_offset, input2_offset, - input1_mult, input2_mult, input1_shift, input2_shift, - left_shift, out_data_c, output_offset, output_mult, - output_shift, activation_min, activation_max, size); - - if (itr == 0) { - profile_c_end(); - profile_opt_start(); - } - - /* Optimized function */ - esp_nn_add_elementwise_s8(input1, input2, input1_offset, input2_offset, - input1_mult, input2_mult, input1_shift, input2_shift, - left_shift, out_data_opt, output_offset, output_mult, - output_shift, activation_min, activation_max, size); - if (itr == 0) { - /* disable profiler */ - profile_opt_end(); - } - - bool ret = CHECK_EQUAL(out_data_c, out_data_opt, size); - if (ret == false) { - printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - printf("Output: \n"); - PRINT_ARRAY_HEX(out_data_opt, size, 1); - printf("Expected: \n"); - PRINT_ARRAY_HEX(out_data_c, size, 1); - printf("Input1:\n"); - PRINT_ARRAY_HEX(input1, size, 1); - printf("Input2:\n"); - PRINT_ARRAY_HEX(input2, size, 1); - printf("in1_shift %d, in2_shift %d, left_shift %d, out_shift %d\n", - input1_shift, input2_shift, left_shift, output_shift); - printf("in1_mult %d, in2_mult %d, out_mult %d\n", input1_mult, input2_mult, output_mult); - goto elementwise_add_test_cleanup; - } - printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - -elementwise_add_test_cleanup: - if (input1_orig) { - free(input1_orig); - } - if (input2_orig) { - free(input2_orig); - } - if (out_c_orig) { - free(out_c_orig); - } - if (out_opt_orig) { - free(out_opt_orig); - } - } -} - -void esp_nn_mul_elementwise_s8_test() -{ - /* prepare data */ - const int size = 1600 + 8 + 7; /* odd len to test leftover */ - int8_t *input1; - int8_t *input2; - int8_t *out_data_c; - int8_t *out_data_opt; - int32_t input1_offset = 34; - int32_t input2_offset = 35; - int32_t output_offset = 36; - int32_t output_shift = -7; - int32_t output_mult = MULT_MAX; // max out_mult - int32_t activation_min = -128; - int32_t activation_max = 127; - int8_t *input1_orig = NULL; - int8_t *input2_orig = NULL; - int8_t *out_c_orig = NULL; - int8_t *out_opt_orig = NULL; - - for (int itr = 0; itr < 10; itr++) { - switch (itr) { - case 0: // all zeros - input1_offset = 0; - input2_offset = 0; - output_offset = 0; - output_mult = 0; - output_shift = 0; - break; - case 1: // hit min - input1_offset = -127; - input2_offset = -127; - output_offset = -128; - output_mult = MULT_MIN; - output_shift = 0; - break; - case 2: // hit max - input1_offset = 128; - input2_offset = 128; - output_offset = -127; - output_mult = MULT_MAX; - output_shift = SHIFT_MIN; - break; - case 3: // hit extreme max - input1_offset = 128; - input2_offset = 128; - output_offset = -127; - output_mult = MULT_MAX; - output_shift = 0; - break; - default: // practical random input - input1_offset = rand() % 256 - 127; // range [-127, 128] - input2_offset = rand() % 256 - 127; // range [-127, 128] - output_offset = rand() % 256 - 128; // range [-128, 127] - output_mult = MULT_MAX / 2 + rand() % INT16_MAX; - output_shift = -8 + rand() % 4; - } - -#if IDF_HEAP_CAPS - input1_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - input2_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - out_c_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - out_opt_orig = (int8_t *) heap_caps_malloc(size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - - input1 = 16 + input1_orig - ((uint32_t) input1_orig & 0xf); - input2 = 16 + input2_orig - ((uint32_t) input2_orig & 0xf); - out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf); - out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf); -#else - input1 = memalign(16, size); - input2 = memalign(16, size); - out_data_c = memalign(16, size); - out_data_opt = memalign(16, size); - - input1_orig = input1; - input2_orig = input2; - out_c_orig = out_data_c; - out_opt_orig = out_data_opt; -#endif - if (input1_orig == NULL || input2_orig == NULL || out_c_orig == NULL || - out_opt_orig == NULL) { - printf(ANSI_COLOR_RED"%s error allocating buffers\n"ANSI_COLOR_RESET, __FUNCTION__); - goto elementwise_mult_test_cleanup; - } - - for (int i = 0; i < size; ++i) { - input1[i] = rand() % 256 - 128; - input2[i] = rand() % 256 - 128; - } - - if (itr == 0) { - /* enable profiler */ - profile_c_start(); - } - /* C function */ - esp_nn_mul_elementwise_s8_ansi(input1, input2, input1_offset, input2_offset, - out_data_c, output_offset, output_mult, output_shift, - activation_min, activation_max, size); - - if (itr == 0) { - profile_c_end(); - profile_opt_start(); - } - /* Optimized function */ - esp_nn_mul_elementwise_s8(input1, input2, input1_offset, input2_offset, - out_data_opt, output_offset, output_mult, output_shift, - activation_min, activation_max, size); - - if (itr == 0) { - /* disable profiler */ - profile_opt_end(); - } - - bool ret = CHECK_EQUAL(out_data_c, out_data_opt, size); - if (ret == false) { - printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - printf("Output: \n"); - PRINT_ARRAY_HEX(out_data_opt, size, 1); - printf("Expected: \n"); - PRINT_ARRAY_HEX(out_data_c, size, 1); - printf("Input1:\n"); - PRINT_ARRAY_HEX(input1, size, 1); - printf("Input2:\n"); - PRINT_ARRAY_HEX(input2, size, 1); - goto elementwise_mult_test_cleanup; - } - printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - -elementwise_mult_test_cleanup: - if (input1_orig) { - free(input1_orig); - } - if (input2_orig) { - free(input2_orig); - } - if (out_c_orig) { - free(out_c_orig); - } - if (out_opt_orig) { - free(out_opt_orig); - } - } -} diff --git a/code/components/esp-nn/tests/src/convolution_test.c b/code/components/esp-nn/tests/src/convolution_test.c deleted file mode 100644 index c86bdbab..00000000 --- a/code/components/esp-nn/tests/src/convolution_test.c +++ /dev/null @@ -1,605 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include - -#include -#include "test_utils.h" - -#if CONFIG_IDF_CMAKE -#if (CONFIG_SPIRAM_SUPPORT && (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC)) -#define IDF_HEAP_CAPS 1 -#endif -#if IDF_HEAP_CAPS -#include "esp_heap_caps.h" -#endif -#endif - -void esp_nn_depthwise_conv_s8_test() -{ - int8_t *input = NULL, *filter_data = NULL, *out_data_c = NULL, *out_data_opt = NULL; - int32_t *bias = NULL; - int32_t input_offset = 5; /* some number in [-128, 127] */ - int32_t out_offset = 7; - int32_t activation_min = -125; - int32_t activation_max = 120; - void *scratch_buf = NULL; - - /* independent variables */ - int input_wd, input_ht, channels; - uint16_t filter_ht, filter_wd, ch_mult; - uint16_t pad_wd, pad_ht, stride_wd, stride_ht; - - // run for 15 iterations - for (int itr = 0; itr < 15; itr++) { - /* prepare data */ - switch (itr) { - case 0: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0) - input_wd = 18; - input_ht = 18; - filter_ht = 3; - filter_wd = 3; - ch_mult = 1; - channels = 16; - pad_wd = 0; - pad_ht = 0; - stride_wd = 1; - stride_ht = 1; - break; - case 1: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (1,1) - input_wd = 10; - input_ht = 10; - filter_ht = 3; - filter_wd = 3; - ch_mult = 1; - channels = 16; - pad_wd = 1; - pad_ht = 1; - stride_wd = 1; - stride_ht = 1; - break; - case 2: // (ch_mult 1, (channels % 8) = 0), filter (3,3), pad (1,1) - input_wd = 10; - input_ht = 10; - filter_ht = 3; - filter_wd = 3; - ch_mult = 1; - channels = 24; - pad_wd = 1; - pad_ht = 1; - stride_wd = 1; - stride_ht = 1; - break; - case 3: // other filter sizes (ch_mult 1, (channels % 8) = 0) - input_wd = 10; - input_ht = 10; - filter_ht = 3; - filter_wd = 3; - ch_mult = 1; - channels = 24; - pad_wd = 1; - pad_ht = 1; - stride_wd = 1; - stride_ht = 1; - break; - case 4: // other filter sizes (ch_mult 8 = 0) - input_wd = 6; - input_ht = 6; - filter_ht = 3; - filter_wd = 3; - ch_mult = 8; - channels = 4; - pad_wd = 1; - pad_ht = 1; - stride_wd = 1; - stride_ht = 1; - break; - case 5: // other filter sizes (ch_mult 8 = 0) - input_wd = 12; - input_ht = 12; - filter_ht = 5; - filter_wd = 5; - ch_mult = 8; - channels = 4; - pad_wd = 1; - pad_ht = 1; - stride_wd = 1; - stride_ht = 1; - break; - case 6: // other filter sizes (ch_mult 4 = 0) - input_wd = 6; - input_ht = 6; - filter_ht = 3; - filter_wd = 3; - ch_mult = 4; - channels = 4; - pad_wd = 1; - pad_ht = 1; - stride_wd = 1; - stride_ht = 1; - break; - case 7: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0) stride (2,2) - input_wd = 6; - input_ht = 6; - filter_ht = 3; - filter_wd = 3; - ch_mult = 1; - channels = 16; - pad_wd = 0; - pad_ht = 0; - stride_wd = 2; - stride_ht = 2; - break; - case 8: // same as case 7, with large parameters - input_wd = 58; - input_ht = 58; - filter_ht = 3; - filter_wd = 3; - ch_mult = 1; - channels = 128; - pad_wd = 0; - pad_ht = 0; - stride_wd = 2; - stride_ht = 2; - break; - case 9: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0) stride (2,2) - input_wd = 6; - input_ht = 6; - filter_ht = 3; - filter_wd = 3; - ch_mult = 1; - channels = 16; - pad_wd = 0; - pad_ht = 0; - stride_wd = 2; - stride_ht = 2; - break; - default: - input_wd = 6; - input_ht = 6; - filter_ht = 3; - filter_wd = 3; - ch_mult = 1; - channels = 16; - stride_wd = rand() % 2 + 1; - stride_ht = stride_wd; - pad_wd = stride_wd == 1 ? 0 : rand() % 2; - pad_ht = pad_wd; - printf("stride(%d), pad (%d)\t", stride_wd, pad_wd); - break; - } - - uint16_t out_wd = (input_wd - filter_wd + 1) / stride_wd; - uint16_t out_ht = (input_ht - filter_ht + 1) / stride_ht; - if (itr == 9) { - // expect the function to handle this gracefully - out_wd += 1; - out_ht += 1; - } - int in_size = input_wd * input_ht * channels; - int out_size = out_wd * out_ht * channels * ch_mult; - int filter_size = filter_wd * filter_ht * channels * ch_mult + 4; - int bias_size = channels * ch_mult + 1; - int32_t out_shift[channels * ch_mult]; - int32_t out_mult[channels * ch_mult]; - -#if IDF_HEAP_CAPS - int8_t *input_orig = (int8_t *) heap_caps_malloc(in_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - int8_t *out_c_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - int8_t *out_opt_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - filter_data = (int8_t *) heap_caps_malloc(filter_size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - bias = (int32_t *) heap_caps_malloc(bias_size * 4, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - - input = 16 + input_orig - ((uint32_t) input_orig & 0xf); - out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf); - out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf); -#else - input = memalign(16, in_size + 16); - filter_data = memalign(16, filter_size); - out_data_c = memalign(16, out_size + 16); - out_data_opt = memalign(16, out_size + 16); - bias = memalign(16, bias_size * 4); - int8_t *input_orig = input; - int8_t *out_c_orig = out_data_c; - int8_t *out_opt_orig = out_data_opt; -#endif - if (bias == NULL || input == NULL || filter_data == NULL || - out_data_c == NULL || out_data_opt == NULL || bias == NULL) { - printf(ANSI_COLOR_RED"%s[%d] allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - goto dc_s8_cleanup; - } - - /* Generate input data */ - for (int i = 0; i < in_size; ++i) { - input[i] = rand() % 128; - } - - /* Generate filter data */ - for (int i = 0; i < filter_size; ++i) { - filter_data[i] = rand() % 256 - 128; - } - - /* Generate bias data */ - for (int i = 0; i < channels * ch_mult; ++i) { - bias[i + 1] = rand() % INT16_MAX; //0th index left for unalignment - out_shift[i] = -8 + rand() % 3; - out_mult[i] = 0x7eb0e200 + rand() % 50; - } - - data_dims_t input_dims = {.width = input_wd, .height = input_ht, .channels = channels, 1}; - data_dims_t output_dims = {.width = out_wd, .height = out_ht, .channels = channels * ch_mult, 1}; - data_dims_t filter_dims = {.width = filter_wd, .height = filter_ht, 0, 0}; - dw_conv_params_t conv_params = {.in_offset = input_offset, .out_offset = out_offset, .ch_mult = ch_mult, - .stride = {stride_wd, stride_ht}, .padding = {pad_wd, pad_ht}, - .dilation = {0, 0}, .activation = {activation_min, activation_max}}; - quant_data_t quant_data = {.shift = out_shift, .mult = out_mult}; - - int scratch_buf_size = esp_nn_get_depthwise_conv_scratch_size(&input_dims, &filter_dims, - &output_dims, &conv_params); - if (scratch_buf_size > 0) { -#if IDF_HEAP_CAPS - scratch_buf = heap_caps_malloc(scratch_buf_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - int align_sz = 16 - (((int32_t) scratch_buf) & 0xf); -#else - scratch_buf = memalign(16, scratch_buf_size); - int align_sz = 0; -#endif - if (scratch_buf == NULL) { - printf(ANSI_COLOR_RED"%s[%d] scratch_buf alloc failed size %d\n"ANSI_COLOR_RESET, - __FUNCTION__, itr, scratch_buf_size); - goto dc_s8_cleanup; - } - esp_nn_set_depthwise_conv_scratch_buf(scratch_buf + align_sz); - } - if (itr == 0) { - /* enable profiler */ - profile_c_start(); - } - - /* C function */ - esp_nn_depthwise_conv_s8_ansi(&input_dims, input, &filter_dims, filter_data + 4, - bias + 1, &output_dims, out_data_c, &conv_params, &quant_data); - - if (itr == 0) { - profile_c_end(); - profile_opt_start(); - } - - /* Optimized function */ - esp_nn_depthwise_conv_s8(&input_dims, input, &filter_dims, filter_data + 4, - bias + 1, &output_dims, out_data_opt, &conv_params, &quant_data); - - if (itr == 0) { - /* disable profiler */ - profile_opt_end(); - } - - bool ret = CHECK_EQUAL(out_data_c, out_data_opt, out_size); - if (ret == false) { - printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - printf("Output: \n"); - PRINT_ARRAY_HEX(out_data_opt, out_size / out_ht, out_ht); - printf("Expected: \n"); - PRINT_ARRAY_HEX(out_data_c, out_size / out_ht, out_ht); - printf("Input:\n"); - PRINT_ARRAY_HEX(input, in_size / input_ht, input_ht); - printf("Filter data:\n"); - PRINT_ARRAY_HEX(filter_data + 4, (filter_size - 4) / filter_ht, filter_ht); - printf("bias data:\n"); - PRINT_ARRAY_INT(bias + 1, ch_mult * channels, 1); - goto dc_s8_cleanup; - } - printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - - dc_s8_cleanup: - if (input) { - free(input_orig); - } - if (filter_data) { - free(filter_data); - } - if (out_data_c) { - free(out_c_orig); - } - if (out_data_opt) { - free(out_opt_orig); - } - if (bias) { - free(bias); - } - if (scratch_buf) { - free(scratch_buf); - } - } -} - -void esp_nn_conv_s8_test() -{ - const int32_t input_offset = 5; /* some number in [-128, 127] */ - const int32_t activation_min = -125; - const int32_t activation_max = 122; - const int32_t out_offset = 3; - - void *scratch_buf = NULL; - int8_t *input_orig; - int8_t *out_c_orig; - int8_t *out_opt_orig; - int8_t *filter_data; - int32_t *bias; - - /* independent variable */ - int in_wd, in_ht, in_channels, out_channels; - uint16_t filter_ht, filter_wd; - uint16_t pad_wd, pad_ht, stride_wd, stride_ht; - - // run for 10 iterations - for (int itr = 0; itr < 10; itr++) { - switch (itr) { - case 0: // ch % 8 == 0 && filter (1,1), padding (0,0) - in_wd = 10; - in_ht = 10; - in_channels = 64; - out_channels = 64; - filter_ht = 1; - filter_wd = 1; - pad_wd = 0; - pad_ht = 0; - stride_wd = 1; - stride_ht = 1; - break; - case 1: // ch % 4 == 0 && (in_wd * in_ht) % 16 == 0 - in_wd = 4; - in_ht = 4; - in_channels = 20; - out_channels = 8; - filter_ht = 1; - filter_wd = 1; - pad_wd = 0; - pad_ht = 0; - stride_wd = 1; - stride_ht = 1; - break; - case 2: // ch, filter (3x3x3) - in_wd = 10; - in_ht = 10; - in_channels = 3; - out_channels = 64; - filter_ht = 3; - filter_wd = 3; - pad_wd = 0; - pad_ht = 0; - stride_wd = 1; - stride_ht = 1; - break; - case 3: // remaining pad (0, 0) - in_wd = 10; - in_ht = 10; - in_channels = 3; - out_channels = 64; - filter_ht = 1; - filter_wd = 1; - pad_wd = 0; - pad_ht = 0; - stride_wd = 1; - stride_ht = 1; - break; - case 4: // unopt case - in_wd = 10; - in_ht = 10; - in_channels = 12; - out_channels = 64; - filter_ht = 3; - filter_wd = 3; - pad_wd = 1; - pad_ht = 1; - stride_wd = 1; - stride_ht = 1; - break; - case 5: // ch % 8 == 0 & stride (2,2) - in_wd = 16; - in_ht = 16; - in_channels = 16; - out_channels = 16; - filter_ht = 1; - filter_wd = 1; - pad_wd = 0; - pad_ht = 0; - stride_wd = 2; - stride_ht = 2; - break; - case 6: // ch % 8 == 0 && filter (1,1), padding (0,0) - in_wd = 2; - in_ht = 2; - in_channels = 8; - out_channels = 8; - filter_ht = 1; - filter_wd = 1; - pad_wd = 0; - pad_ht = 0; - stride_wd = 1; - stride_ht = 1; - break; - default: // ch % 8 == 0 - in_wd = 8; - in_ht = 8; - in_channels = 16; - out_channels = 16; - filter_ht = 1; - filter_wd = 1; - pad_wd = 0; - pad_ht = 0; - stride_wd = 1; - stride_ht = 1; - break; - } - - /* prepare data */ - uint16_t out_wd = (in_wd - filter_wd + 1) / stride_wd; - uint16_t out_ht = (in_ht - filter_ht + 1) / stride_ht; - - int in_size = in_wd * in_ht * in_channels; - int filter_size = filter_wd * filter_ht * in_channels * out_channels + 2; - int out_size = out_wd * out_ht * out_channels; - -#if IDF_HEAP_CAPS - input_orig = (int8_t *) heap_caps_malloc(in_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - out_c_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - out_opt_orig = (int8_t *) heap_caps_malloc(out_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - filter_data = (int8_t *) heap_caps_malloc(filter_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - bias = (int32_t *) heap_caps_malloc(128 + sizeof (int32_t) * out_channels, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - - int8_t *input = 16 + input_orig - ((uint32_t) input_orig & 0xf); - int8_t *out_data_c = 16 + out_c_orig - ((uint32_t) out_c_orig & 0xf); - int8_t *out_data_opt = 16 + out_opt_orig - ((uint32_t) out_opt_orig & 0xf); -#else - int8_t *input = memalign(16, in_size); - int8_t *out_data_c = memalign(16, out_size); - int8_t *out_data_opt = memalign(16, out_size); - filter_data = memalign(16, filter_size); - bias = calloc(1, 128 + sizeof (int32_t) * out_channels); - input_orig = input; - out_c_orig = out_data_c; - out_opt_orig = out_data_opt; -#endif - int32_t *out_shift = calloc(1, 128 + sizeof (int32_t) * out_channels); - int32_t *out_mult = calloc(1, 128 + sizeof (int32_t) * out_channels); - - if (input == NULL || filter_data == NULL || - out_data_c == NULL || out_data_opt == NULL) { - printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__); - goto conv_s8_cleanup; - } - - if (bias == NULL || out_shift == NULL || out_mult == NULL) { - printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__); - goto conv_s8_cleanup; - } - - /* Generate input data between -128 -> +127 */ - for (int i = 0; i < in_size; ++i) { - input[i] = rand() % 255 - 128; - } - - /* Generate filter data between -128 -> +127 */ - for (int i = 0; i < filter_size; ++i) { - filter_data[i] = rand() % 256 - 128; - } - - /* Generate bias data */ - for (int i = 0; i < out_channels; ++i) { - bias[i] = (int32_t)rand() % UINT16_MAX + UINT8_MAX; - } - - /* Shift and multiplier */ - for (int i = 0; i < out_channels; ++i) { - out_shift[i] = -10 + rand() % 2; - out_mult[i] = 0x7f67f4f8 + rand() % 50; - } - - data_dims_t input_dims = {.width = in_wd, .height = in_ht, .channels = in_channels, 1}; - data_dims_t output_dims = {.width = out_wd, .height = out_ht, .channels = out_channels, 1}; - data_dims_t filter_dims = {.width = filter_wd, .height = filter_ht, 0, 0}; - conv_params_t conv_params = {.in_offset = input_offset, .out_offset = out_offset, - .stride = {stride_wd, stride_ht}, .padding = {pad_wd, pad_ht}, - .dilation = {0, 0}, .activation = {activation_min, activation_max}}; - quant_data_t quant_data = {.shift = out_shift, .mult = out_mult}; - - int scratch_buf_size = esp_nn_get_conv_scratch_size(&input_dims, &filter_dims, - &output_dims, &conv_params); - if (scratch_buf_size > 0) { -#if IDF_HEAP_CAPS - void *scratch_buf = heap_caps_malloc(scratch_buf_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); - int align_sz = 16 - (((int32_t) scratch_buf) & 0xf); -#else - void *scratch_buf = memalign(16, scratch_buf_size); - int align_sz = 0; -#endif - if (scratch_buf == NULL) { - printf(ANSI_COLOR_RED"%s scratch_buf alloc failed size %d\n"ANSI_COLOR_RESET, __FUNCTION__, scratch_buf_size); - goto conv_s8_cleanup; - } - esp_nn_set_conv_scratch_buf(scratch_buf + align_sz); - } - - if (itr == 0) { - /* enable profiler */ - profile_c_start(); - } - - /* C function */ - esp_nn_conv_s8_ansi(&input_dims, input, &filter_dims, filter_data + 2, - bias, &output_dims, out_data_c, &conv_params, &quant_data); - - if (itr == 0) { - profile_c_end(); - profile_opt_start(); - } - - /* Optimized function */ - esp_nn_conv_s8(&input_dims, input, &filter_dims, filter_data + 2, - bias, &output_dims, out_data_opt, &conv_params, &quant_data); - - if (itr == 0) { - /* disable profiler */ - profile_opt_end(); - } - - bool ret = CHECK_EQUAL(out_data_c, out_data_opt, out_size); - if (ret == false) { - printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - printf("Output: \n"); - PRINT_ARRAY_HEX(out_data_opt, out_size / out_ht, out_ht); - printf("Expected: \n"); - PRINT_ARRAY_HEX(out_data_c, out_size / out_ht, out_ht); - printf("Input:\n"); - PRINT_ARRAY_HEX(input, in_size / in_ht, in_ht); - printf("Filter data:\n"); - PRINT_ARRAY_HEX(filter_data + 2, (filter_size - 2) / filter_ht, filter_ht); - printf("bias data:\n"); - PRINT_ARRAY_INT(bias, out_channels, 1); - goto conv_s8_cleanup; - } - printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - - conv_s8_cleanup: - if (input) { - free(input_orig); - } - if (filter_data) { - free(filter_data); - } - if (out_data_c) { - free(out_c_orig); - } - if (out_data_opt) { - free(out_opt_orig); - } - if (bias) { - free(bias); - } - if (out_shift) { - free(out_shift); - } - if (out_mult) { - free(out_mult); - } - if (scratch_buf) { - free(scratch_buf); - } - } -} diff --git a/code/components/esp-nn/tests/src/fully_connected_test.c b/code/components/esp-nn/tests/src/fully_connected_test.c deleted file mode 100644 index d0210b46..00000000 --- a/code/components/esp-nn/tests/src/fully_connected_test.c +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include - -#include -#include "test_utils.h" - - -void esp_nn_fully_connected_s8_test() -{ - /* prepare data */ - static uint16_t row_len = 256 + 8 + 7; /* odd len to test unaligned+left-over */ - static uint16_t out_channels = 3; - int8_t input[row_len]; - int8_t filter_data[row_len * out_channels]; - int8_t output_c[out_channels], output_opt[out_channels]; - static int32_t activation_min = -128; - static int32_t activation_max = 127; - static int32_t input_offset = 0; - static int32_t filter_offset = 0; - int32_t out_shift = -10; - static int32_t out_offset = 127; - int32_t out_mult = 0x59e492c4; - for (int itr = 0; itr < 5; itr++) { - out_mult = INT32_MAX / row_len + rand() % INT16_MAX; - switch (itr) { - case 0: - out_shift = -10; - break; - case 1: - out_shift = SHIFT_MIN; - break; - case 2: - out_shift = SHIFT_MAX; - break; - case 3: - out_shift = 0; - break; - default: - out_shift = -10 + rand() % 5; - break; - } - if (itr == 0) { - out_shift = SHIFT_MAX; - } - /* Generate input and filter data */ - for (int i = 0; i < row_len; ++i) { - input[i] = rand() % 256 - 128; - } - for (int i = 0; i < row_len * out_channels; ++i) { - filter_data[i] = rand() % 256 - 128; - } - - if (itr == 0) { - /* enable profiler */ - profile_c_start(); - } - - /* C function */ - esp_nn_fully_connected_s8_ansi(input, input_offset, row_len, filter_data, filter_offset, - NULL, output_c, out_channels, out_offset, out_shift, out_mult, - activation_min, activation_max); - - if (itr == 0) { - profile_c_end(); - profile_opt_start(); - } - - /* Optimized function */ - esp_nn_fully_connected_s8(input, input_offset, row_len, filter_data, filter_offset, - NULL, output_opt, out_channels, out_offset, out_shift, out_mult, - activation_min, activation_max); - - if (itr == 0) { - /* disable profiler */ - profile_opt_end(); - } - - bool ret = CHECK_EQUAL(output_c, output_opt, out_channels); - if (ret == false) { - printf(ANSI_COLOR_RED"%s[%d] failed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - printf("Output: \n"); - PRINT_ARRAY_HEX(output_opt, out_channels, 1); - printf("Expected: \n"); - PRINT_ARRAY_HEX(output_c, out_channels, 1); - printf("Input:\n"); - PRINT_ARRAY_HEX(input, row_len, 1); - printf("Filter data:\n"); - PRINT_ARRAY_HEX(filter_data, row_len, out_channels); - printf("Out shift: %d\n", out_shift); - printf("Out mult: %x\n", out_mult); - return; - } - printf(ANSI_COLOR_GREEN"%s[%d] passed\n"ANSI_COLOR_RESET, __FUNCTION__, itr); - } -} diff --git a/code/components/esp-nn/tests/src/pooling_test.c b/code/components/esp-nn/tests/src/pooling_test.c deleted file mode 100644 index c1c889e1..00000000 --- a/code/components/esp-nn/tests/src/pooling_test.c +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include - -#include -#include "test_utils.h" - - -void esp_nn_avg_pool_s8_test() -{ - /* prepare data */ - const uint16_t input_wd = 16; - const uint16_t input_ht = 16; - const uint16_t channels = 16; /* With TFLite example, I have seen it 256 */ - const int size = input_wd * input_ht * channels; - int8_t *input, *output_c, *output_opt; - const int32_t activation_min = -128; - const int32_t activation_max = 127; - const uint16_t pad_wd = 1; - const uint16_t pad_ht = 1; - const uint16_t stride_wd = 1; - const uint16_t stride_ht = 1; - const uint16_t filter_ht = 3; - const uint16_t filter_wd = 3; - const uint16_t out_wd = input_wd / stride_wd; - const uint16_t out_ht = input_ht / stride_ht; - const int out_size = out_wd * out_ht * channels; - - input = memalign(16, size); - output_c = memalign(16, out_size); - output_opt = memalign(16, out_size); - - if (input == NULL || output_c == NULL || output_opt == NULL) { - printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__); - goto avg_pool_s8_cleanup; - } - /** - * width/height, channels etc look suspicious but it it true. - * It actually depends upon where in model this is actually placed. - * If at the end wd/ht tends to be smaller and depth larger. - */ - - for (int i = 0; i < size; ++i) { - input[i] = rand() % 256 - 128; - } - - /* enable profiler */ - profile_c_start(); - - /* C function */ - esp_nn_avg_pool_s8_ansi(input, input_wd, input_ht, output_c, out_wd, out_ht, - stride_wd, stride_ht, filter_wd, filter_ht, pad_wd, pad_ht, - activation_min, activation_max, channels); - - profile_c_end(); - profile_opt_start(); - - /* Optimized function */ - esp_nn_avg_pool_s8(input, input_wd, input_ht, output_opt, out_wd, out_ht, - stride_wd, stride_ht, filter_wd, filter_ht, pad_wd, pad_ht, - activation_min, activation_max, channels); - - /* disable profiler */ - profile_opt_end(); - - - bool ret = CHECK_EQUAL(output_c, output_opt, out_size); - if (ret == false) { - printf(ANSI_COLOR_RED"%s failed\n"ANSI_COLOR_RESET, __FUNCTION__); - printf("Output: \n"); - PRINT_ARRAY_HEX(output_opt, out_wd * channels, out_ht); - printf("Expected: \n"); - PRINT_ARRAY_HEX(output_c, out_wd * channels, out_ht); - printf("Input:\n"); - PRINT_ARRAY_HEX(input, input_wd * channels, input_ht); - goto avg_pool_s8_cleanup; - } - printf(ANSI_COLOR_GREEN"%s passed\n"ANSI_COLOR_RESET, __FUNCTION__); - -avg_pool_s8_cleanup: - if (input) { - free(input); - } - if (output_c) { - free(output_c); - } - if (output_opt) { - free(output_opt); - } -} - -void esp_nn_max_pool_s8_test() -{ - /* prepare data */ - const uint16_t input_wd = 16; - const uint16_t input_ht = 16; - const uint16_t channels = 16; /* With TFLite example, I have seen it 256 */ - int8_t *input, *output_c, *output_opt; - const int size = input_wd * input_ht * channels; - const int32_t activation_min = -128; - const int32_t activation_max = 127; - const uint16_t pad_wd = 1; - const uint16_t pad_ht = 1; - const uint16_t stride_wd = 1; - const uint16_t stride_ht = 1; - const uint16_t filter_ht = 3; - const uint16_t filter_wd = 3; - const uint16_t out_wd = input_wd / stride_wd; - const uint16_t out_ht = input_ht / stride_ht; - const int out_size = out_wd * out_ht * channels; - - input = memalign(16, size); - output_c = memalign(16, out_size); - output_opt = memalign(16, out_size); - - if (input == NULL || output_c == NULL || output_opt == NULL) { - printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__); - goto max_pool_s8_cleanup; - } - - for (int i = 0; i < size; ++i) { - input[i] = rand() % 256 - 128; - } - - /* enable profiler */ - profile_c_start(); - - /* C function */ - esp_nn_max_pool_s8_ansi(input, input_wd, input_ht, output_c, out_wd, out_ht, - stride_wd, stride_ht, filter_wd, filter_ht, pad_wd, pad_ht, - activation_min, activation_max, channels); - - profile_c_end(); - profile_opt_start(); - - /* Optimized function */ - esp_nn_max_pool_s8(input, input_wd, input_ht, output_opt, out_wd, out_ht, - stride_wd, stride_ht, filter_wd, filter_ht, pad_wd, pad_ht, - activation_min, activation_max, channels); - - /* disable profiler */ - profile_opt_end(); - - - bool ret = CHECK_EQUAL(output_c, output_opt, out_wd * out_ht * channels); - if (ret == false) { - printf(ANSI_COLOR_RED"%s failed\n"ANSI_COLOR_RESET, __FUNCTION__); - printf("Output: \n"); - PRINT_ARRAY_HEX(output_opt, out_wd * out_ht * channels, 1); - printf("Expected: \n"); - PRINT_ARRAY_HEX(output_c, out_wd * out_ht * channels, 1); - printf("Input:\n"); - PRINT_ARRAY_HEX(input, 8, size / 8); - goto max_pool_s8_cleanup; - } - printf(ANSI_COLOR_GREEN"%s passed\n"ANSI_COLOR_RESET, __FUNCTION__); - -max_pool_s8_cleanup: - if (input) { - free(input); - } - if (output_c) { - free(output_c); - } - if (output_opt) { - free(output_opt); - } -} diff --git a/code/components/esp-nn/tests/src/relu_test.c b/code/components/esp-nn/tests/src/relu_test.c deleted file mode 100644 index ce6f13f1..00000000 --- a/code/components/esp-nn/tests/src/relu_test.c +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include - -#include -#include "test_utils.h" - -void esp_nn_relu6_s8_test() -{ - const int size = 1600 + 8 + 7; - int8_t *input, *inout_ansi, *inout_opt; - - input = memalign(16, size); - inout_ansi = memalign(16, size); - inout_opt = memalign(16, size); - - if (input == NULL || inout_ansi == NULL || inout_opt == NULL) { - printf(ANSI_COLOR_RED"%s allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__); - goto relu6_s8_cleanup; - } - /* Generate filter data between -128 -> +127 */ - for (int i = 0; i < size; ++i) { - input[i] = rand() % 255 - 128; - inout_ansi[i] = input[i]; - inout_opt[i] = input[i]; - } - - /* enable profiler */ - profile_c_start(); - - /* C function */ - esp_nn_relu6_s8_ansi(inout_ansi, size); - - profile_c_end(); - profile_opt_start(); - - /* Optimized function */ - esp_nn_relu6_s8(inout_opt, size); - - /* disable profiler */ - profile_opt_end(); - - bool ret = CHECK_EQUAL(inout_ansi, inout_opt, size); - if (ret == false) { - printf(ANSI_COLOR_RED"%s failed\n"ANSI_COLOR_RESET, __FUNCTION__); - printf("Output: \n"); - PRINT_ARRAY_HEX(inout_opt, size, 1); - printf("Expected: \n"); - PRINT_ARRAY_HEX(inout_ansi, size, 1); - printf("Input:\n"); - PRINT_ARRAY_HEX(input, size, 1); - goto relu6_s8_cleanup; - } - printf(ANSI_COLOR_GREEN"%s passed\n"ANSI_COLOR_RESET, __FUNCTION__); - -relu6_s8_cleanup: - if (input) { - free (input); - } - if (inout_ansi) { - free (inout_ansi); - } - if (inout_opt) { - free (inout_opt); - } - -} diff --git a/code/components/esp-nn/tests/src/softmax_test.c b/code/components/esp-nn/tests/src/softmax_test.c deleted file mode 100644 index f7c734cd..00000000 --- a/code/components/esp-nn/tests/src/softmax_test.c +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2022 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include - -#include -#include "test_utils.h" - -void esp_nn_softmax_s8_test() -{ - const int32_t height = 8; - const int32_t width = 32; - const int32_t diff_min = -128; - const int32_t mult = INT32_MAX / 2; - const int32_t shift = 7; - void *scratch_buf = NULL; - const int size = width * height; - int8_t *input, *out_ansi, *out_opt; - - input = memalign(16, size); - out_ansi = memalign(16, size); - out_opt = memalign(16, size); - - if (input == NULL || out_ansi == NULL || out_opt == NULL) { - printf(ANSI_COLOR_RED"%s buffer allocations failed\n"ANSI_COLOR_RESET, __FUNCTION__); - goto softmax_s8_cleanup; - } - - /* Generate input data between -128 -> +127 */ - for (int i = 0; i < size; ++i) { - input[i] = rand() % 255 - 128; - } - - /* enable profiler */ - profile_c_start(); - - /* C function */ - esp_nn_softmax_s8_ansi(input, height, width, mult, shift, diff_min, out_ansi); - - profile_c_end(); - - int32_t scratch_buf_size = esp_nn_get_softmax_scratch_size(width, height); - if (scratch_buf_size) { - scratch_buf = memalign(4, scratch_buf_size); - if (scratch_buf == NULL) { - printf(ANSI_COLOR_RED"%s scratch_buf alloc failed size %d\n"ANSI_COLOR_RESET, __FUNCTION__, scratch_buf_size); - goto softmax_s8_cleanup; - } - esp_nn_set_softmax_scratch_buf(scratch_buf); - } - - profile_opt_start(); - - /* Optimized function */ - esp_nn_softmax_s8(input, height, width, mult, shift, diff_min, out_opt); - - /* disable profiler */ - profile_opt_end(); - - bool ret = CHECK_EQUAL(out_ansi, out_opt, size); - if (ret == false) { - printf(ANSI_COLOR_RED"%s failed\n"ANSI_COLOR_RESET, __FUNCTION__); - printf("Output: \n"); - PRINT_ARRAY_HEX(out_opt, width, height); - printf("Expected: \n"); - PRINT_ARRAY_HEX(out_ansi, width, height); - printf("Input:\n"); - PRINT_ARRAY_HEX(input, width, height); - goto softmax_s8_cleanup; - } - printf(ANSI_COLOR_GREEN"%s passed\n"ANSI_COLOR_RESET, __FUNCTION__); - -softmax_s8_cleanup: - if (input) { - free (input); - } - if (out_ansi) { - free (out_ansi); - } - if (out_opt) { - free (out_opt); - } - if (scratch_buf) { - free (scratch_buf); - } -}