From 0e7c600cf7a2effd0119964b6250f5a31ed8d48b Mon Sep 17 00:00:00 2001
From: jomjol <30766535+jomjol@users.noreply.github.com>
Date: Sun, 24 Jul 2022 18:53:25 +0200
Subject: [PATCH] Rolling v10.6.1
---
README.md | 7 +-
code/components/esp-nn/CMakeLists.txt | 4 +-
code/components/esp-nn/Kconfig.projbuild | 8 +-
code/components/esp-nn/README.md | 7 +-
code/components/esp-nn/include/esp_nn.h | 10 +-
.../components/esp-nn/include/esp_nn_ansi_c.h | 1 -
.../esp-nn/include/esp_nn_ansi_headers.h | 136 +-
code/components/esp-nn/include/esp_nn_defs.h | 83 -
.../{esp_nn_generic_opt.h => esp_nn_esp32.h} | 19 +-
.../esp-nn/include/esp_nn_esp32s3.h | 76 +-
.../esp-nn/src/common/common_functions.h | 67 +-
.../esp-nn/src/convolution/esp_nn_conv_ansi.c | 56 +-
.../src/convolution/esp_nn_conv_esp32s3.c | 185 +-
.../esp-nn/src/convolution/esp_nn_conv_opt.c | 179 -
.../convolution/esp_nn_depthwise_conv_ansi.c | 57 +-
.../convolution/esp_nn_depthwise_conv_opt.c | 291 --
.../esp_nn_depthwise_conv_s8_esp32s3.c | 134 +-
.../test_app/sdkconfig.defaults.esp32s3 | 8 -
.../esp-nn/tests/src/basic_math_test.c | 20 +-
.../esp-nn/tests/src/convolution_test.c | 106 +-
code/components/esp-nn_20220724.zip | Bin 0 -> 132850 bytes
.../.github/workflows/build.yml | 29 +-
.../.github/workflows/upload_component.yml | 4 +-
.../esp32-camera-master/CMakeLists.txt | 51 +-
code/components/esp32-camera-master/Kconfig | 58 -
code/components/esp32-camera-master/README.md | 3 -
.../conversions/esp_jpg_decode.c | 8 +-
.../esp32-camera-master/conversions/jpge.cpp | 5 -
.../esp32-camera-master/conversions/to_bmp.c | 20 +-
.../conversions/to_jpg.cpp | 20 +-
.../esp32-camera-master/driver/cam_hal.c | 17 +-
.../esp32-camera-master/driver/esp_camera.c | 41 -
.../driver/include/esp_camera.h | 17 -
.../driver/include/sensor.h | 10 -
.../esp32-camera-master/driver/sccb.c | 5 -
.../esp32-camera-master/driver/sensor.c | 3 -
.../examples/main/take_picture.c | 5 -
.../esp32-camera-master/idf_component.yml | 5 +-
.../esp32-camera-master/sensors/bf20a6.c | 404 ---
.../esp32-camera-master/sensors/gc0308.c | 11 +-
.../sensors/private_include/bf20a6.h | 27 -
.../sensors/private_include/bf20a6_regs.h | 12 -
.../sensors/private_include/bf20a6_settings.h | 158 -
.../sensors/private_include/gc0308_settings.h | 21 +-
.../sensors/private_include/ov5640_settings.h | 3 +-
.../sensors/private_include/sc030iot.h | 31 -
.../private_include/sc030iot_settings.h | 491 ---
.../sensors/private_include/sc101iot.h | 31 -
.../private_include/sc101iot_settings.h | 257 --
.../esp32-camera-master/sensors/sc030iot.c | 335 --
.../esp32-camera-master/sensors/sc101iot.c | 342 --
.../esp32-camera-master/target/esp32/ll_cam.c | 8 +-
.../target/esp32s2/ll_cam.c | 9 +-
.../target/esp32s3/ll_cam.c | 89 +-
.../target/private_include/ll_cam.h | 6 -
.../esp32-camera-master_20220724.zip | Bin 0 -> 308908 bytes
.../jomjol_image_proc/CFindTemplate.h | 2 +-
code/components/tflite-lib/CMakeLists.txt | 5 +-
.../tflite-lib/tensorflow/lite/builtin_ops.h | 2 -
.../tensorflow/lite/c/builtin_op_data.h | 3 -
.../tensorflow/lite/c/c_api_types.h | 8 +-
.../tflite-lib/tensorflow/lite/c/common.cc | 50 +-
.../tflite-lib/tensorflow/lite/c/common.h | 55 +-
.../lite/core/api/flatbuffer_conversions.cc | 11 -
.../tensorflow/lite/core/api/op_resolver.h | 53 +-
.../kernels/internal/reference/hard_swish.h | 6 +-
.../lite/kernels/internal/runtime_shape.h | 9 +-
.../tensorflow/lite/kernels/internal/types.h | 10 +-
.../tensorflow/lite/kernels/kernel_util.h | 2 +-
.../non_persistent_arena_buffer_allocator.cc | 165 -
.../non_persistent_arena_buffer_allocator.h | 104 -
.../persistent_arena_buffer_allocator.cc | 52 -
.../persistent_arena_buffer_allocator.h | 59 -
.../lite/micro/fake_micro_context.cc | 2 +-
.../{arena_allocator => }/ibuffer_allocator.h | 6 +-
.../lite/micro/kernels/activations.cc | 27 +-
.../tensorflow/lite/micro/kernels/add.cc | 9 +-
.../tensorflow/lite/micro/kernels/add_n.cc | 9 +-
.../lite/micro/kernels/arg_min_max.cc | 18 +-
.../lite/micro/kernels/assign_variable.cc | 9 +-
.../lite/micro/kernels/batch_to_space_nd.cc | 9 +-
.../lite/micro/kernels/broadcast_args.cc | 12 +-
.../lite/micro/kernels/broadcast_to.cc | 12 +-
.../lite/micro/kernels/call_once.cc | 9 +-
.../tensorflow/lite/micro/kernels/cast.cc | 9 +-
.../tensorflow/lite/micro/kernels/ceil.cc | 9 +-
.../lite/micro/kernels/circular_buffer.cc | 9 +-
.../lite/micro/kernels/comparisons.cc | 60 +-
.../lite/micro/kernels/concatenation.cc | 16 +-
.../tensorflow/lite/micro/kernels/conv.cc | 62 +-
.../tensorflow/lite/micro/kernels/conv_test.h | 10 -
.../tensorflow/lite/micro/kernels/cumsum.cc | 9 +-
.../lite/micro/kernels/depth_to_space.cc | 9 +-
.../lite/micro/kernels/depthwise_conv.cc | 13 +-
.../lite/micro/kernels/depthwise_conv.h | 28 +-
.../lite/micro/kernels/dequantize.cc | 17 +-
.../lite/micro/kernels/dequantize_common.cc | 5 +-
.../micro/kernels/detection_postprocess.cc | 11 +-
.../lite/micro/kernels/elementwise.cc | 368 +-
.../tensorflow/lite/micro/kernels/elu.cc | 9 +-
.../lite/micro/kernels/esp_nn/add.cc | 9 +-
.../lite/micro/kernels/esp_nn/conv.cc | 67 +-
.../micro/kernels/esp_nn/depthwise_conv.cc | 71 +-
.../micro/kernels/esp_nn/fully_connected.cc | 9 +-
.../lite/micro/kernels/esp_nn/mul.cc | 9 +-
.../lite/micro/kernels/esp_nn/pooling.cc | 18 +-
.../lite/micro/kernels/esp_nn/softmax.cc | 208 --
.../tensorflow/lite/micro/kernels/exp.cc | 9 +-
.../lite/micro/kernels/expand_dims.cc | 9 +-
.../tensorflow/lite/micro/kernels/fill.cc | 9 +-
.../tensorflow/lite/micro/kernels/floor.cc | 9 +-
.../lite/micro/kernels/floor_div.cc | 9 +-
.../lite/micro/kernels/floor_mod.cc | 9 +-
.../lite/micro/kernels/fully_connected.cc | 31 +-
.../lite/micro/kernels/fully_connected.h | 20 +-
.../tensorflow/lite/micro/kernels/gather.cc | 9 +-
.../lite/micro/kernels/gather_nd.cc | 9 +-
.../lite/micro/kernels/hard_swish.cc | 10 +-
.../tensorflow/lite/micro/kernels/if.cc | 9 +-
.../lite/micro/kernels/kernel_runner.cc | 5 +-
.../lite/micro/kernels/kernel_runner.h | 5 +-
.../lite/micro/kernels/kernel_util.cc | 15 -
.../lite/micro/kernels/kernel_util.h | 25 +-
.../lite/micro/kernels/l2_pool_2d.cc | 9 +-
.../tensorflow/lite/micro/kernels/l2norm.cc | 9 +-
.../lite/micro/kernels/leaky_relu.cc | 10 +-
.../lite/micro/kernels/log_softmax.cc | 9 +-
.../tensorflow/lite/micro/kernels/logical.cc | 22 +-
.../tensorflow/lite/micro/kernels/logistic.cc | 9 +-
.../lite/micro/kernels/lstm_eval.cc | 2955 -----------------
.../tensorflow/lite/micro/kernels/lstm_eval.h | 250 --
.../lite/micro/kernels/lstm_shared.h | 67 -
.../lite/micro/kernels/maximum_minimum.cc | 28 +-
.../tensorflow/lite/micro/kernels/micro_ops.h | 6 +-
.../lite/micro/kernels/micro_tensor_utils.cc | 809 -----
.../lite/micro/kernels/micro_tensor_utils.h | 874 -----
.../lite/micro/kernels/mirror_pad.cc | 9 +-
.../tensorflow/lite/micro/kernels/mul.cc | 9 +-
.../tensorflow/lite/micro/kernels/neg.cc | 9 +-
.../tensorflow/lite/micro/kernels/pack.cc | 9 +-
.../tensorflow/lite/micro/kernels/pad.cc | 18 +-
.../tensorflow/lite/micro/kernels/pooling.cc | 18 +-
.../tensorflow/lite/micro/kernels/prelu.cc | 9 +-
.../tensorflow/lite/micro/kernels/quantize.cc | 10 +-
.../lite/micro/kernels/quantize_common.cc | 65 +-
.../lite/micro/kernels/read_variable.cc | 9 +-
.../tensorflow/lite/micro/kernels/reduce.cc | 316 +-
.../tensorflow/lite/micro/kernels/reduce.h | 61 -
.../lite/micro/kernels/reduce_common.cc | 311 --
.../tensorflow/lite/micro/kernels/reshape.cc | 9 +-
.../lite/micro/kernels/resize_bilinear.cc | 9 +-
.../micro/kernels/resize_nearest_neighbor.cc | 10 +-
.../tensorflow/lite/micro/kernels/round.cc | 9 +-
.../tensorflow/lite/micro/kernels/shape.cc | 9 +-
.../tensorflow/lite/micro/kernels/slice.cc | 9 +-
.../tensorflow/lite/micro/kernels/softmax.cc | 9 +-
.../tensorflow/lite/micro/kernels/softmax.h | 28 +-
.../lite/micro/kernels/softmax_common.cc | 86 +-
.../lite/micro/kernels/space_to_batch_nd.cc | 9 +-
.../lite/micro/kernels/space_to_depth.cc | 9 +-
.../tensorflow/lite/micro/kernels/split.cc | 9 +-
.../tensorflow/lite/micro/kernels/split_v.cc | 9 +-
.../lite/micro/kernels/squared_difference.cc | 247 --
.../tensorflow/lite/micro/kernels/squeeze.cc | 9 +-
.../lite/micro/kernels/strided_slice.cc | 10 +-
.../tensorflow/lite/micro/kernels/sub.cc | 9 +-
.../tensorflow/lite/micro/kernels/svdf.cc | 9 +-
.../tensorflow/lite/micro/kernels/tanh.cc | 10 +-
.../lite/micro/kernels/transpose.cc | 9 +-
.../lite/micro/kernels/transpose_conv.cc | 17 +-
.../kernels/unidirectional_sequence_lstm.cc | 1696 ----------
...unidirectional_sequence_lstm_test_config.h | 244 --
.../tensorflow/lite/micro/kernels/unpack.cc | 9 +-
.../lite/micro/kernels/var_handle.cc | 9 +-
.../tensorflow/lite/micro/kernels/while.cc | 9 +-
.../lite/micro/kernels/zeros_like.cc | 9 +-
.../lite/micro/micro_allocation_info.cc | 40 +-
.../lite/micro/micro_allocation_info.h | 4 -
.../tensorflow/lite/micro/micro_allocator.cc | 2 +-
.../tensorflow/lite/micro/micro_allocator.h | 2 +-
.../tensorflow/lite/micro/micro_context.cc | 10 -
.../tensorflow/lite/micro/micro_context.h | 7 -
.../lite/micro/micro_mutable_op_resolver.h | 29 +-
.../tensorflow/lite/micro/micro_profiler.cc | 12 +-
.../tensorflow/lite/micro/micro_profiler.h | 6 +-
.../tensorflow/lite/micro/micro_time.cc | 8 +-
.../tensorflow/lite/micro/micro_time.h | 10 +-
.../lite/micro/recording_micro_allocator.cc | 2 +-
.../lite/micro/recording_micro_allocator.h | 2 +-
.../recording_simple_memory_allocator.cc | 2 +-
.../recording_simple_memory_allocator.h | 8 +-
.../simple_memory_allocator.cc | 2 +-
.../simple_memory_allocator.h | 8 +-
.../tensorflow/lite/micro/test_helpers.cc | 18 +-
.../tensorflow/lite/schema/schema_generated.h | 152 +-
code/components/tflite-lib_20220417.zip | Bin 740090 -> 0 bytes
code/components/tflite-lib_20220724.zip | Bin 0 -> 861331 bytes
code/main/version.cpp | 6 +-
code/main/version.h | 2 +-
code/platformio.ini | 2 +-
code/sdkconfig.esp32cam | 3 -
code/version.cpp | 6 +-
firmware/bootloader.bin | Bin 26864 -> 26864 bytes
firmware/dhy0540s3DropOut-10erq.tflite | Bin 0 -> 315504 bytes
firmware/firmware.bin | Bin 1788992 -> 1777776 bytes
firmware/html.zip | Bin 179819 -> 179824 bytes
sd-card/html/edit_config_param.html | 4 +-
sd-card/html/edit_reference.html | 4 +-
sd-card/html/version.txt | 2 +-
209 files changed, 1791 insertions(+), 12917 deletions(-)
delete mode 100644 code/components/esp-nn/include/esp_nn_defs.h
rename code/components/esp-nn/include/{esp_nn_generic_opt.h => esp_nn_esp32.h} (77%)
delete mode 100644 code/components/esp-nn/src/convolution/esp_nn_conv_opt.c
delete mode 100644 code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_opt.c
delete mode 100644 code/components/esp-nn/test_app/sdkconfig.defaults.esp32s3
create mode 100644 code/components/esp-nn_20220724.zip
delete mode 100644 code/components/esp32-camera-master/sensors/bf20a6.c
delete mode 100644 code/components/esp32-camera-master/sensors/private_include/bf20a6.h
delete mode 100644 code/components/esp32-camera-master/sensors/private_include/bf20a6_regs.h
delete mode 100644 code/components/esp32-camera-master/sensors/private_include/bf20a6_settings.h
delete mode 100644 code/components/esp32-camera-master/sensors/private_include/sc030iot.h
delete mode 100644 code/components/esp32-camera-master/sensors/private_include/sc030iot_settings.h
delete mode 100644 code/components/esp32-camera-master/sensors/private_include/sc101iot.h
delete mode 100644 code/components/esp32-camera-master/sensors/private_include/sc101iot_settings.h
delete mode 100644 code/components/esp32-camera-master/sensors/sc030iot.c
delete mode 100644 code/components/esp32-camera-master/sensors/sc101iot.c
create mode 100644 code/components/esp32-camera-master_20220724.zip
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.cc
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.cc
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h
rename code/components/tflite-lib/tensorflow/lite/micro/{arena_allocator => }/ibuffer_allocator.h (95%)
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/esp_nn/softmax.cc
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/lstm_eval.cc
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/lstm_eval.h
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/lstm_shared.h
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/micro_tensor_utils.cc
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/micro_tensor_utils.h
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/reduce.h
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/reduce_common.cc
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/squared_difference.cc
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc
delete mode 100644 code/components/tflite-lib/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm_test_config.h
rename code/components/tflite-lib/tensorflow/lite/micro/{arena_allocator => }/recording_simple_memory_allocator.cc (97%)
rename code/components/tflite-lib/tensorflow/lite/micro/{arena_allocator => }/recording_simple_memory_allocator.h (87%)
rename code/components/tflite-lib/tensorflow/lite/micro/{arena_allocator => }/simple_memory_allocator.cc (99%)
rename code/components/tflite-lib/tensorflow/lite/micro/{arena_allocator => }/simple_memory_allocator.h (95%)
delete mode 100644 code/components/tflite-lib_20220417.zip
create mode 100644 code/components/tflite-lib_20220724.zip
create mode 100644 firmware/dhy0540s3DropOut-10erq.tflite
diff --git a/README.md b/README.md
index 61343250..678b320d 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,12 @@ In other cases you can contact the developer via email:
+
/************************** Basic math functions ****************************/
/**
@@ -80,15 +81,28 @@ void esp_nn_mul_elementwise_s8_ansi(const int8_t *input1_data,
* optimization notes: Though input_offset is int32 type,
* offset values are contained in 8 bits [-128, 127]
*/
-void esp_nn_depthwise_conv_s8_ansi(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
+void esp_nn_depthwise_conv_s8_ansi(const int8_t *input_data,
+ const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t channels,
+ const int32_t input_offset,
+ const uint16_t pad_wd,
+ const uint16_t pad_ht,
+ const uint16_t stride_wd,
+ const uint16_t stride_ht,
+ const uint16_t ch_mult,
const int8_t *filter_data,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht,
const int32_t *bias,
- const data_dims_t *output_dims,
int8_t *out_data,
- const dw_conv_params_t *conv_params,
- const quant_data_t *quant_data);
+ const uint16_t out_wd,
+ const uint16_t out_ht,
+ const int32_t out_offset,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t activation_min,
+ const int32_t activation_max);
/**
* @brief 2d-convolution channelwise
@@ -98,26 +112,43 @@ void esp_nn_depthwise_conv_s8_ansi(const data_dims_t *input_dims,
* inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*/
-void esp_nn_conv_s8_ansi(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
+void esp_nn_conv_s8_ansi(const int8_t *input_data,
+ const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t in_channels,
+ const int32_t input_offset,
+ const uint16_t pad_wd,
+ const uint16_t pad_ht,
+ const uint16_t stride_wd,
+ const uint16_t stride_ht,
const int8_t *filter_data,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht,
const int32_t *bias,
- const data_dims_t *output_dims,
int8_t *out_data,
- const conv_params_t *conv_params,
- const quant_data_t *quant_data);
+ const uint16_t out_wd,
+ const uint16_t out_ht,
+ const uint16_t out_channels,
+ const int32_t out_offset,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t activation_min,
+ const int32_t activation_max);
-int esp_nn_get_conv_scratch_size_ansi(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const conv_params_t *conv_params);
+int esp_nn_get_conv_scratch_size_ansi(const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t in_ch,
+ const uint16_t out_ch,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht);
void esp_nn_set_conv_scratch_buf_ansi(const void *buf);
-int esp_nn_get_depthwise_conv_scratch_size_ansi(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const dw_conv_params_t *conv_params);
+int esp_nn_get_depthwise_conv_scratch_size_ansi(const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t channels,
+ const uint16_t ch_mult,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht);
void esp_nn_set_depthwise_conv_scratch_buf_ansi(const void *buf);
/************************** Activation functions *****************************/
@@ -221,6 +252,9 @@ int32_t esp_nn_get_softmax_scratch_size_opt(const int32_t width, const int32_t h
*/
void esp_nn_set_softmax_scratch_buf_ansi(void *buffer);
+/* ANSI C function to be hooked up when optimised version needed */
+void esp_nn_set_softmax_scratch_buf_opt(void *buffer);
+
/**
* @brief reference softmax function
*
@@ -234,66 +268,6 @@ void esp_nn_softmax_s8_ansi(const int8_t *input_data,
const int32_t diff_min,
int8_t *output_data);
-
-//////////////////////////// Generic optimisations /////////////////////////////
-
-/************************** Convolution functions *****************************/
-
-/**
- * @brief 2d-convolution channelwise optimized version
- *
- * @note operation: result += (input + offset) * filter
- *
- * inputs type: int8_t, output: int8_t
- * input offsets: although int32_t, they are contained in 8 bits [-128, 127]
- */
-void esp_nn_conv_s8_opt(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
- const int8_t *filter_data,
- const int32_t *bias,
- const data_dims_t *output_dims,
- int8_t *out_data,
- const conv_params_t *conv_params,
- const quant_data_t *quant_data);
-
-/**
- * @brief depthwise convolution per channel optimized version
- *
- * @note inputs type: int8_t, output: int8_t
- * Version used in tflite is per channel.
- * This version follows the same footsprints.
- * Meaning, it has per out_channel shift and multiplier for
- * requantization
- *
- * optimization notes: Though input_offset is int32 type,
- * offset values are contained in 8 bits [-128, 127]
- */
-void esp_nn_depthwise_conv_s8_opt(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
- const int8_t *filter_data,
- const int32_t *bias,
- const data_dims_t *output_dims,
- int8_t *out_data,
- const dw_conv_params_t *conv_params,
- const quant_data_t *quant_data);
-
-int esp_nn_get_conv_scratch_size_opt(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const conv_params_t *conv_params);
-void esp_nn_set_conv_scratch_buf_opt(const void *buf);
-
-int esp_nn_get_depthwise_conv_scratch_size_opt(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const dw_conv_params_t *conv_params);
-void esp_nn_set_depthwise_conv_scratch_buf_opt(const void *buf);
-
-/* ANSI C function to be hooked up when optimised version needed */
-void esp_nn_set_softmax_scratch_buf_opt(void *buffer);
-
/**
* @brief optimised version of softmax function
*
diff --git a/code/components/esp-nn/include/esp_nn_defs.h b/code/components/esp-nn/include/esp_nn_defs.h
deleted file mode 100644
index 756d8e6f..00000000
--- a/code/components/esp-nn/include/esp_nn_defs.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright 2022 Espressif Systems (Shanghai) PTE LTD
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include
-
-/**
- * @brief structure to club data dims
- * this structure can be used for input, output and filter
- */
-typedef struct data_dims {
- int32_t width;
- int32_t height;
- int32_t channels;
-
- int32_t extra; // can be used as batch or any other param
-} data_dims_t;
-
-/**
- * @brief 2d data structure (width, height)
- *
- */
-typedef struct data_2d {
- int32_t width;
- int32_t height;
-} data_2d_t;
-
-/**
- * @brief min/max activation
- */
-typedef struct act_params {
- int32_t min;
- int32_t max;
-} act_params_t;
-
-/**
- * @brief per channel quant data
- *
- * @note number of shift and mult elements are equal to output channels
- */
-typedef struct quant_data {
- int32_t *shift;
- int32_t *mult;
-} quant_data_t;
-
-/**
- * @brief params specific to convolution 2d
- *
- */
-typedef struct conv_params {
- int32_t in_offset;
- int32_t out_offset;
- data_2d_t stride;
- data_2d_t padding;
- data_2d_t dilation;
- act_params_t activation;
-} conv_params_t;
-
-/**
- * @brief params specific to depthwise convolution 2d
- *
- */
-typedef struct dw_conv_params {
- int32_t in_offset;
- int32_t out_offset;
- int32_t ch_mult; // channel multiplier. (in_ch * ch_mult = out_ch)
- data_2d_t stride;
- data_2d_t padding;
- data_2d_t dilation;
- act_params_t activation;
-} dw_conv_params_t;
diff --git a/code/components/esp-nn/include/esp_nn_generic_opt.h b/code/components/esp-nn/include/esp_nn_esp32.h
similarity index 77%
rename from code/components/esp-nn/include/esp_nn_generic_opt.h
rename to code/components/esp-nn/include/esp_nn_esp32.h
index 136cba5d..03fd8216 100644
--- a/code/components/esp-nn/include/esp_nn_generic_opt.h
+++ b/code/components/esp-nn/include/esp_nn_esp32.h
@@ -13,27 +13,28 @@
// limitations under the License.
/**
- * @file Header definitions to include for esp_nn generic optimisations
- * For functions which not having optimisations, _ansi versions are picked.
+ * @file Header definitions to include for esp_nn optimized functions for
+ * the ESP32 platform.
+ * We are hooking up just the C versions for now.
+ * The file hence is exactly same as `esp_nn_ansi_c.h`
*/
#pragma once
-#include "esp_nn_defs.h"
#include "esp_nn_ansi_headers.h"
#define esp_nn_add_elementwise_s8 esp_nn_add_elementwise_s8_ansi
#define esp_nn_mul_elementwise_s8 esp_nn_mul_elementwise_s8_ansi
-#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_opt
+#define esp_nn_depthwise_conv_s8 esp_nn_depthwise_conv_s8_ansi
-#define esp_nn_conv_s8 esp_nn_conv_s8_opt
+#define esp_nn_conv_s8 esp_nn_conv_s8_ansi
-#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_opt
-#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_opt
+#define esp_nn_get_conv_scratch_size esp_nn_get_conv_scratch_size_ansi
+#define esp_nn_set_conv_scratch_buf esp_nn_set_conv_scratch_buf_ansi
-#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_opt
-#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_opt
+#define esp_nn_get_depthwise_conv_scratch_size esp_nn_get_depthwise_conv_scratch_size_ansi
+#define esp_nn_set_depthwise_conv_scratch_buf esp_nn_set_depthwise_conv_scratch_buf_ansi
#define esp_nn_relu6_s8 esp_nn_relu6_s8_ansi
diff --git a/code/components/esp-nn/include/esp_nn_esp32s3.h b/code/components/esp-nn/include/esp_nn_esp32s3.h
index 0f52c943..58b544e4 100644
--- a/code/components/esp-nn/include/esp_nn_esp32s3.h
+++ b/code/components/esp-nn/include/esp_nn_esp32s3.h
@@ -19,7 +19,7 @@
#pragma once
-#include "esp_nn_defs.h"
+#include
#include "esp_nn_ansi_headers.h"
/************************** Basic math functions *****************************/
@@ -85,15 +85,28 @@ void esp_nn_mul_elementwise_s8_esp32s3(const int8_t *input1_data,
* optimization notes: Though input_offset is int32 type,
* offset values are contained in 8 bits [-128, 127]
*/
-void esp_nn_depthwise_conv_s8_esp32s3(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
+void esp_nn_depthwise_conv_s8_esp32s3(const int8_t *input_data,
+ const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t channels,
+ const int32_t input_offset,
+ const uint16_t pad_wd,
+ const uint16_t pad_ht,
+ const uint16_t stride_wd,
+ const uint16_t stride_ht,
+ const uint16_t ch_mult,
const int8_t *filter_data,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht,
const int32_t *bias,
- const data_dims_t *output_dims,
- int8_t *output_data,
- const dw_conv_params_t *conv_params,
- const quant_data_t *quant_data);
+ int8_t *out_data,
+ const uint16_t out_wd,
+ const uint16_t out_ht,
+ const int32_t out_offset,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t activation_min,
+ const int32_t activation_max);
/**
* @brief 2d - convolution channelwise
@@ -103,26 +116,43 @@ void esp_nn_depthwise_conv_s8_esp32s3(const data_dims_t *input_dims,
* inputs type: int8_t, output: int8_t
* input offsets: although int32_t, they are contained in 8 bits [-128, 127]
*/
-void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
+void esp_nn_conv_s8_esp32s3(const int8_t *input_data,
+ const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t in_channels,
+ const int32_t input_offset,
+ const uint16_t pad_wd,
+ const uint16_t pad_ht,
+ const uint16_t stride_wd,
+ const uint16_t stride_ht,
const int8_t *filter_data,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht,
const int32_t *bias,
- const data_dims_t *output_dims,
- int8_t *output_data,
- const conv_params_t *conv_params,
- const quant_data_t *quant_data);
+ int8_t *out_data,
+ const uint16_t out_wd,
+ const uint16_t out_ht,
+ const uint16_t out_channels,
+ const int32_t out_offset,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t activation_min,
+ const int32_t activation_max);
-int esp_nn_get_conv_scratch_size_esp32s3(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const conv_params_t *conv_params);
+int esp_nn_get_conv_scratch_size_esp32s3(const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t in_ch,
+ const uint16_t out_ch,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht);
void esp_nn_set_conv_scratch_buf_esp32s3(const void *buf);
-int esp_nn_get_depthwise_conv_scratch_size_esp32s3(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const dw_conv_params_t *conv_params);
+int esp_nn_get_depthwise_conv_scratch_size_esp32s3(const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t channels,
+ const uint16_t ch_mult,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht);
void esp_nn_set_depthwise_conv_scratch_buf_esp32s3(const void *buf);
/************************** Pooling functions *****************************/
diff --git a/code/components/esp-nn/src/common/common_functions.h b/code/components/esp-nn/src/common/common_functions.h
index 0a74eca4..9a5f0dcc 100644
--- a/code/components/esp-nn/src/common/common_functions.h
+++ b/code/components/esp-nn/src/common/common_functions.h
@@ -41,52 +41,8 @@
__NN_FORCE_INLINE__ int32_t esp_nn_clz32(uint32_t in)
{
-#if CONFIG_IDF_TARGET_ARCH_XTENSA
__asm__ volatile("nsau %0, %0" : "+r" (in));
return in;
-#elif defined(__GNUC__)
- return __builtin_clz(in);
-#else
- int32_t count = 32;
- uint32_t x = in, y = in >> 16;
- if (y != 0) {
- count -= 16;
- x = y;
- }
- y = x >> 8;
- if (y != 0) {
- count -= 8;
- x = y;
- }
- y = x >> 4;
- if (y != 0) {
- count -= 4;
- x = y;
- }
- y = x >> 2;
- if (y != 0) {
- count -= 2;
- x = y;
- }
- y = x >> 1;
- if (y != 0) {
- return count - 2;
- }
- return count - x;
-#endif
-}
-
-/**
- * Signed saturate a 32 bit value to 8 bits keeping output in 32 bit variable.
- */
-__NN_FORCE_INLINE__ int32_t esp_nn_saturate8(int32_t in)
-{
-#if CONFIG_IDF_TARGET_ARCH_XTENSA
- __asm__ volatile("clamps %0, %0, 7" : "+a"(in));
- return in;
-#else
- return max(INT8_MIN, min(in, INT8_MAX));
-#endif
}
__NN_FORCE_INLINE__ int32_t esp_nn_pick_sat_high32_of64(int64_t val64)
@@ -96,6 +52,15 @@ __NN_FORCE_INLINE__ int32_t esp_nn_pick_sat_high32_of64(int64_t val64)
return (int32_t) ((int64_t) (val64 + to_add) >> 31);
}
+/**
+ * Signed saturate a 32 bit value to 8 bits keeping output in 32 bit variable.
+ */
+__NN_FORCE_INLINE__ int32_t esp_nn_saturate8(int32_t in)
+{
+ __asm__ volatile("clamps %0, %0, 7" : "+a"(in));
+ return in;
+}
+
__NN_FORCE_INLINE__ int32_t esp_nn_sat_round_doubling_high_mul(int32_t in0, int32_t in1)
{
int32_t result;
@@ -179,7 +144,7 @@ static void esp_nn_aligned_s8_pad_with_value(const int8_t *src, int8_t *dst,
const uint16_t pad_ht)
{
/* memset with pad_val */
- memset(dst, pad_val, ((input_wd + 2 * pad_wd) * (input_ht + 2 * pad_ht)) * channels);
+ memset(dst, pad_val, ((input_wd + 2 * pad_wd) * (input_ht + 2 * pad_ht)) * channels * 2);
dst += (pad_wd + input_wd + pad_wd) * channels;
for (int i = 0; i < input_ht; i++) {
@@ -191,6 +156,7 @@ static void esp_nn_aligned_s8_pad_with_value(const int8_t *src, int8_t *dst,
}
}
+#if 0
static void esp_nn_aligned_s8_pad_end_with_value(const int8_t *src, int8_t *dst,
const uint16_t input_wd,
const uint16_t input_ht,
@@ -203,16 +169,13 @@ static void esp_nn_aligned_s8_pad_end_with_value(const int8_t *src, int8_t *dst,
for (int j = 0; j < input_wd * channels; j++) {
*dst++ = *src++;
}
- if (pad_wd) {
- memset(dst, pad_val, pad_wd * channels);
- dst += pad_wd * channels;
- }
+ memset(dst, pad_val, pad_wd * channels);
+ dst += pad_wd * channels;
}
/* pad end `pad_ht` lines at end */
- if (pad_ht) {
- memset(dst, pad_val, (input_wd + pad_wd) * pad_ht * channels);
- }
+ memset(dst, pad_val, (input_wd + pad_wd) * pad_ht * channels);
}
+#endif
/**
* @brief convert 8 bit input data to 16 bit
diff --git a/code/components/esp-nn/src/convolution/esp_nn_conv_ansi.c b/code/components/esp-nn/src/convolution/esp_nn_conv_ansi.c
index 677c0ad8..d04f78e1 100644
--- a/code/components/esp-nn/src/convolution/esp_nn_conv_ansi.c
+++ b/code/components/esp-nn/src/convolution/esp_nn_conv_ansi.c
@@ -12,14 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include
+#include
#include
-int esp_nn_get_conv_scratch_size_ansi(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const conv_params_t *conv_params)
+int esp_nn_get_conv_scratch_size_ansi(const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t in_ch,
+ const uint16_t out_ch,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht)
{
return 0;
}
@@ -106,35 +108,29 @@ void esp_nn_conv_u8_ansi(const uint8_t *input_data,
* Assumption 2: Pointers are valid
* Assumption 3: dialation width = 1
*/
-void esp_nn_conv_s8_ansi(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
+void esp_nn_conv_s8_ansi(const int8_t *input_data,
+ const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t in_channels,
+ const int32_t input_offset,
+ const uint16_t pad_wd,
+ const uint16_t pad_ht,
+ const uint16_t stride_wd,
+ const uint16_t stride_ht,
const int8_t *filter_data,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht,
const int32_t *bias,
- const data_dims_t *output_dims,
int8_t *out_data,
- const conv_params_t *conv_params,
- const quant_data_t *quant_data)
+ const uint16_t out_wd,
+ const uint16_t out_ht,
+ const uint16_t out_channels,
+ const int32_t out_offset,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t activation_min,
+ const int32_t activation_max)
{
- const uint16_t input_wd = input_dims->width;
- const uint16_t input_ht = input_dims->height;
- const uint16_t in_channels = input_dims->channels;
- const int32_t input_offset = conv_params->in_offset;
- const int32_t out_offset = conv_params->out_offset;
- const uint16_t pad_wd = conv_params->padding.width;
- const uint16_t pad_ht = conv_params->padding.height;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
- const uint16_t filter_wd = filter_dims->width;
- const uint16_t filter_ht = filter_dims->height;
- const uint16_t out_wd = output_dims->width;
- const uint16_t out_ht = output_dims->height;
- const uint16_t out_channels = output_dims->channels;
- const int32_t *out_shift = quant_data->shift;
- const int32_t *out_mult = quant_data->mult;
- const int32_t activation_min = conv_params->activation.min;
- const int32_t activation_max = conv_params->activation.max;
-
int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx;
for (out_y = 0; out_y < out_ht; out_y++) {
diff --git a/code/components/esp-nn/src/convolution/esp_nn_conv_esp32s3.c b/code/components/esp-nn/src/convolution/esp_nn_conv_esp32s3.c
index e13129b2..ea8fdfa5 100644
--- a/code/components/esp-nn/src/convolution/esp_nn_conv_esp32s3.c
+++ b/code/components/esp-nn/src/convolution/esp_nn_conv_esp32s3.c
@@ -12,30 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include
#include
-#include
#include
static int16_t *scratch_buffer = NULL;
-extern void esp_nn_conv_s8_mult8_1x1_esp32s3(const int8_t *input_data,
- const uint16_t input_wd,
- const uint16_t input_ht,
- const uint16_t in_channels,
- const int32_t input_offset,
- const int8_t *filter_aligned,
- const int32_t *bias,
- int8_t *out_data,
- const uint16_t out_wd,
- const uint16_t out_ht,
- const uint16_t out_channels,
- const int32_t out_offset,
- const int32_t *out_shift,
- const int32_t *out_mult,
- const int32_t activation_min,
- const int32_t activation_max,
- void *buffer /* scratch buffer */);
+extern void esp_nn_conv_s16_mult8_1x1_esp32s3(const int8_t *input_data,
+ const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t in_channels,
+ const int32_t input_offset,
+ const int16_t *filter_data,
+ const int32_t *bias,
+ int8_t *out_data,
+ const uint16_t out_wd,
+ const uint16_t out_ht,
+ const uint16_t out_channels,
+ const int32_t out_offset,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t activation_min,
+ const int32_t activation_max,
+ void *buffer /* scratch buffer */);
extern void esp_nn_conv_s16_mult4_1x1_esp32s3(const int16_t *input_data,
const uint16_t input_wd,
@@ -81,40 +81,34 @@ extern void esp_nn_aligned_s8_to_s16_with_offset_esp32s3(const int8_t *src, int1
extern void esp_nn_s8_to_s16_esp32s3(const int8_t *src, int16_t *dst, const int size);
-static void esp_nn_conv_s8_unrolled(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
+static void esp_nn_conv_s8_unrolled(const int8_t *input_data,
+ const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t in_channels,
+ const int32_t input_offset,
+ const uint16_t pad_wd,
+ const uint16_t pad_ht,
+ const uint16_t stride_wd,
+ const uint16_t stride_ht,
const int8_t *filter_data,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht,
const int32_t *bias,
- const data_dims_t *output_dims,
int8_t *out_data,
- const conv_params_t *conv_params,
- const quant_data_t *quant_data)
+ const uint16_t out_wd,
+ const uint16_t out_ht,
+ const uint16_t out_channels,
+ const int32_t out_offset,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t activation_min,
+ const int32_t activation_max)
{
- const uint16_t input_wd = input_dims->width;
- const uint16_t input_ht = input_dims->height;
- const uint16_t in_ch = input_dims->channels;
- const int32_t input_offset = conv_params->in_offset;
- const int32_t out_offset = conv_params->out_offset;
- const uint16_t pad_wd = conv_params->padding.width;
- const uint16_t pad_ht = conv_params->padding.height;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
- const uint16_t filter_wd = filter_dims->width;
- const uint16_t filter_ht = filter_dims->height;
- const uint16_t out_wd = output_dims->width;
- const uint16_t out_ht = output_dims->height;
- const uint16_t out_ch = output_dims->channels;
- const int32_t *out_shift = quant_data->shift;
- const int32_t *out_mult = quant_data->mult;
- const int32_t activation_min = conv_params->activation.min;
- const int32_t activation_max = conv_params->activation.max;
-
int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx;
for (out_y = 0; out_y < out_ht; out_y++) {
for (out_x = 0; out_x < out_wd; out_x++) {
- for (out_ch_idx = 0; out_ch_idx < out_ch; out_ch_idx++) {
+ for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
int32_t conv_out = 0;
const int32_t base_y = stride_ht * out_y - pad_ht;
@@ -130,10 +124,10 @@ static void esp_nn_conv_s8_unrolled(const data_dims_t *input_dims,
for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
const int32_t in_row = base_y + filter_y_idx;
const int32_t in_col = base_x + filter_x_idx;
- int32_t input_base_offset = (in_row * input_wd + in_col) * in_ch;
- int32_t filter_base_offset = out_ch_idx * in_ch * filter_ht * filter_wd +
- (filter_y_idx * filter_wd + filter_x_idx) * in_ch;
- for (in_ch_idx = 0; in_ch_idx < in_ch; in_ch_idx++) {
+ int32_t input_base_offset = (in_row * input_wd + in_col) * in_channels;
+ int32_t filter_base_offset = out_ch_idx * in_channels * filter_ht * filter_wd +
+ (filter_y_idx * filter_wd + filter_x_idx) * in_channels;
+ for (in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) {
conv_out +=
(input_data[input_base_offset + in_ch_idx] + input_offset) *
filter_data[filter_base_offset + in_ch_idx];
@@ -338,35 +332,18 @@ static void esp_nn_conv_s8_pad_valid_ch3_3x3(const int8_t *input_data,
}
}
-int esp_nn_get_conv_scratch_size_esp32s3(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const conv_params_t *conv_params)
+int esp_nn_get_conv_scratch_size_esp32s3(const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t in_ch,
+ const uint16_t out_ch,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht)
{
- const uint16_t input_wd = input_dims->width;
- const uint16_t input_ht = input_dims->height;
- const uint16_t in_ch = input_dims->channels;
- const uint16_t filter_wd = filter_dims->width;
- const uint16_t filter_ht = filter_dims->height;
- const uint16_t out_ch = output_dims->channels;
- const uint16_t pad_wd = conv_params->padding.width;
- const uint16_t pad_ht = conv_params->padding.height;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
-
int filter_size = filter_wd * filter_ht * in_ch * out_ch;
int input_size = input_wd * input_ht * in_ch;
-
- int transpose_buf_size = 2 * (8 * in_ch); /* to store intermediate data */
- if (input_wd * input_ht < 8) {
- transpose_buf_size = 0; // not using this for leftover
- }
+ int transpose_buf_size = 8 * in_ch; /* to store intermediate data */
int align_buf_size = 32; /* extra buffer for alignment */
- if (in_ch % 8 == 0 && filter_wd == 1 && filter_ht == 1 &&
- pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) {
- return filter_size + transpose_buf_size + align_buf_size;
- }
- return 2 * (filter_size + input_size) + transpose_buf_size + align_buf_size;
+ return 2 * (filter_size + input_size + transpose_buf_size) + align_buf_size;
}
void esp_nn_set_conv_scratch_buf_esp32s3(void *buf)
@@ -374,35 +351,29 @@ void esp_nn_set_conv_scratch_buf_esp32s3(void *buf)
scratch_buffer = (int16_t *) buf;
}
-void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims,
- const int8_t *input,
- const data_dims_t *filter_dims,
+void esp_nn_conv_s8_esp32s3(const int8_t *input,
+ const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t channels,
+ const int32_t input_offset,
+ const uint16_t pad_wd,
+ const uint16_t pad_ht,
+ const uint16_t stride_wd,
+ const uint16_t stride_ht,
const int8_t *filter_data,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht,
const int32_t *bias,
- const data_dims_t *output_dims,
int8_t *out_data,
- const conv_params_t *conv_params,
- const quant_data_t *quant_data)
+ const uint16_t out_wd,
+ const uint16_t out_ht,
+ const uint16_t out_channels,
+ const int32_t out_offset,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t activation_min,
+ const int32_t activation_max)
{
- const uint16_t input_wd = input_dims->width;
- const uint16_t input_ht = input_dims->height;
- const uint16_t channels = input_dims->channels;
- const int32_t input_offset = conv_params->in_offset;
- const int32_t out_offset = conv_params->out_offset;
- const uint16_t pad_wd = conv_params->padding.width;
- const uint16_t pad_ht = conv_params->padding.height;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
- const uint16_t filter_wd = filter_dims->width;
- const uint16_t filter_ht = filter_dims->height;
- const uint16_t out_wd = output_dims->width;
- const uint16_t out_ht = output_dims->height;
- const uint16_t out_channels = output_dims->channels;
- const int32_t *out_shift = quant_data->shift;
- const int32_t *out_mult = quant_data->mult;
- const int32_t activation_min = conv_params->activation.min;
- const int32_t activation_max = conv_params->activation.max;
-
int filter_size = filter_wd * filter_ht * channels * out_channels;
int input_size = input_wd * input_ht * channels;
int align_len = 16 - (filter_size & 15);
@@ -416,16 +387,15 @@ void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims,
if (channels % 8 == 0 && filter_wd == 1 && filter_ht == 1 &&
pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) {
- int8_t *filter_aligned = (int8_t *) scratch_buffer;
- int scratch_offset = (int) (filter_aligned + filter_size);
+ int scratch_offset = (int) (filter_data16 + filter_size);
void *scratch_buf = (void *) (scratch_offset + 16 - (scratch_offset & 15));
- memcpy(filter_aligned, filter_data, filter_size); // copy to aligned address
- esp_nn_conv_s8_mult8_1x1_esp32s3(
- input, input_wd, input_ht, channels, input_offset, filter_aligned,
+ esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size);
+ esp_nn_conv_s16_mult8_1x1_esp32s3(
+ input, input_wd, input_ht, channels, input_offset, filter_data16,
bias, out_data, out_wd, out_ht, out_channels, out_offset,
out_shift, out_mult, activation_min, activation_max, scratch_buf);
} else if (channels % 4 == 0 && filter_wd == 1 && filter_ht == 1 &&
- (input_wd * input_ht) % 4 == 0 && /* TODO: remove this check */
+ (input_wd * input_ht) % 16 == 0 && /* TODO: remove this check */
pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) {
int scratch_offset = (int) (input_data16 + input_size);
void *scratch_buf = (void *) (scratch_offset + 16 - (scratch_offset & 15));
@@ -457,7 +427,10 @@ void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims,
}
} else {
/* Basic unrolled version */
- esp_nn_conv_s8_unrolled(input_dims, input, filter_dims, filter_data,
- bias, output_dims, out_data, conv_params, quant_data);
+ esp_nn_conv_s8_unrolled(input, input_wd, input_ht, channels, input_offset,
+ pad_wd, pad_ht, stride_wd, stride_ht,
+ filter_data, filter_wd, filter_ht, bias,
+ out_data, out_wd, out_ht, out_channels, out_offset, out_shift,
+ out_mult, activation_min, activation_max);
}
}
diff --git a/code/components/esp-nn/src/convolution/esp_nn_conv_opt.c b/code/components/esp-nn/src/convolution/esp_nn_conv_opt.c
deleted file mode 100644
index be96430e..00000000
--- a/code/components/esp-nn/src/convolution/esp_nn_conv_opt.c
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include
-
-#include
-
-int esp_nn_get_conv_scratch_size_opt(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const conv_params_t *conv_params)
-{
- return 0;
-}
-
-void esp_nn_set_conv_scratch_buf_opt(const void *buf)
-{
-
-}
-
-__attribute__ ((noinline))
-static void esp_nn_conv_s8_1x1(const data_dims_t *input_dims,
- const int8_t *input_data,
- const int8_t *filter_data,
- const int32_t *bias,
- const data_dims_t *output_dims,
- int8_t *out_data,
- const conv_params_t *conv_params,
- const quant_data_t *quant_data)
-{
- const uint16_t input_wd = input_dims->width;
- const uint16_t in_channels = input_dims->channels;
- const int32_t input_offset = conv_params->in_offset;
- const int32_t out_offset = conv_params->out_offset;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
- const uint16_t out_wd = output_dims->width;
- const uint16_t out_ht = output_dims->height;
- const uint16_t out_channels = output_dims->channels;
- const int32_t activation_min = conv_params->activation.min;
- const int32_t activation_max = conv_params->activation.max;
-
- for (int32_t in_row = 0; in_row < out_ht * stride_ht; in_row += stride_ht) {
- for (int32_t in_col = 0; in_col < out_wd * stride_wd; in_col += stride_wd) {
- const int32_t *out_mult = quant_data->mult;
- const int32_t *out_shift = quant_data->shift;
- const int8_t *filter_ptr = filter_data;
- const int8_t *input_base_ptr = input_data + (in_row * input_wd + in_col) * in_channels;
- int32_t out_ch_idx = 0;
- for (; out_ch_idx < out_channels; out_ch_idx++) {
- int32_t conv_out = 0;
-
- const int8_t *input_ptr = input_base_ptr;
-
- int32_t in_ch_idx = 0;
- for (; in_ch_idx < in_channels - 3; in_ch_idx += 4) {
- conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
- conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
- conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
- conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
- }
- for (; in_ch_idx < in_channels; in_ch_idx ++) {
- conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
- }
- if (bias) {
- conv_out += bias[out_ch_idx];
- }
- conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, *out_mult++, *out_shift++);
- conv_out += out_offset;
- conv_out = max(conv_out, activation_min);
- conv_out = min(conv_out, activation_max);
- *out_data++ = (int8_t) conv_out;
- }
- }
- }
-}
-
-/**
- * Assumption 1: i/p channels == o/p channels
- * Assumption 2: Pointers are valid
- * Assumption 3: dialation width = 1
- */
-void esp_nn_conv_s8_opt(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
- const int8_t *filter_data,
- const int32_t *bias,
- const data_dims_t *output_dims,
- int8_t *out_data,
- const conv_params_t *conv_params,
- const quant_data_t *quant_data)
-{
- const uint16_t filter_wd = filter_dims->width;
- const uint16_t filter_ht = filter_dims->height;
-
- if (filter_wd == 1 && filter_ht == 1) {
- esp_nn_conv_s8_1x1(input_dims, input_data, filter_data, bias,
- output_dims, out_data, conv_params, quant_data);
- return;
- }
-
- const uint16_t input_wd = input_dims->width;
- const uint16_t input_ht = input_dims->height;
- const uint16_t in_channels = input_dims->channels;
- const int32_t input_offset = conv_params->in_offset;
- const int32_t out_offset = conv_params->out_offset;
- const uint16_t pad_wd = conv_params->padding.width;
- const uint16_t pad_ht = conv_params->padding.height;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
- const uint16_t out_wd = output_dims->width;
- const uint16_t out_ht = output_dims->height;
- const uint16_t out_channels = output_dims->channels;
- const int32_t activation_min = conv_params->activation.min;
- const int32_t activation_max = conv_params->activation.max;
-
- int32_t out_ch_idx, out_y, out_x, filter_y_idx, filter_x_idx;
-
- for (out_y = 0; out_y < out_ht; out_y++) {
- for (out_x = 0; out_x < out_wd; out_x++) {
- const int32_t *out_shift = quant_data->shift;
- const int32_t *out_mult = quant_data->mult;
- for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
- int32_t conv_out = 0;
-
- const int32_t base_y = stride_ht * out_y - pad_ht;
- const int32_t base_x = stride_wd * out_x - pad_wd;
-
- const int32_t filter_y_start = max(0, -base_y);
- const int32_t filter_x_start = max(0, -base_x);
-
- const int32_t filter_y_end = min(filter_ht, input_ht - base_y);
- const int32_t filter_x_end = min(filter_wd, input_wd - base_x);
-
- for (filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
- for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
- const int32_t in_row = base_y + filter_y_idx;
- const int32_t in_col = base_x + filter_x_idx;
-
- const int8_t *input_ptr = input_data +
- (in_row * input_wd + in_col) * in_channels;
- const int8_t *filter_ptr = filter_data +
- out_ch_idx * in_channels * filter_ht * filter_wd +
- (filter_y_idx * filter_wd + filter_x_idx) * in_channels;
- int32_t in_ch_idx = 0;
- for (; in_ch_idx < in_channels - 3; in_ch_idx += 4) {
- conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
- conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
- conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
- conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
- }
- for (; in_ch_idx < in_channels; in_ch_idx ++) {
- conv_out += (*input_ptr++ + input_offset) * *filter_ptr++;
- }
- }
- }
- if (bias) {
- conv_out += bias[out_ch_idx];
- }
- conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, *out_mult++, *out_shift++);
- conv_out += out_offset;
- conv_out = max(conv_out, activation_min);
- conv_out = min(conv_out, activation_max);
- *out_data++ = (int8_t) conv_out;
- }
- }
- }
-}
diff --git a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_ansi.c b/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_ansi.c
index 1cd02e0f..9cac6cef 100644
--- a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_ansi.c
+++ b/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_ansi.c
@@ -12,13 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include
+#include
+
#include
-int esp_nn_get_depthwise_conv_scratch_size_ansi(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const dw_conv_params_t *conv_params)
+int esp_nn_get_depthwise_conv_scratch_size_ansi(const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t channels,
+ const uint16_t ch_mult,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht)
{
return 0;
}
@@ -28,35 +31,29 @@ void esp_nn_set_depthwise_conv_scratch_buf_ansi(const void *buf)
}
-void esp_nn_depthwise_conv_s8_ansi(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
+void esp_nn_depthwise_conv_s8_ansi(const int8_t *input_data,
+ const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t channels,
+ const int32_t input_offset,
+ const uint16_t pad_wd,
+ const uint16_t pad_ht,
+ const uint16_t stride_wd,
+ const uint16_t stride_ht,
+ const uint16_t ch_mult,
const int8_t *filter_data,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht,
const int32_t *bias,
- const data_dims_t *output_dims,
int8_t *out_data,
- const dw_conv_params_t *conv_params,
- const quant_data_t *quant_data)
+ const uint16_t out_wd,
+ const uint16_t out_ht,
+ const int32_t out_offset,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t activation_min,
+ const int32_t activation_max)
{
- const uint16_t input_wd = input_dims->width;
- const uint16_t input_ht = input_dims->height;
- const uint16_t channels = input_dims->channels;
- const int32_t input_offset = conv_params->in_offset;
- const int32_t out_offset = conv_params->out_offset;
- const uint16_t pad_wd = conv_params->padding.width;
- const uint16_t pad_ht = conv_params->padding.height;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
- const uint16_t filter_wd = filter_dims->width;
- const uint16_t filter_ht = filter_dims->height;
- const uint16_t out_wd = output_dims->width;
- const uint16_t out_ht = output_dims->height;
- const int32_t *out_shift = quant_data->shift;
- const int32_t *out_mult = quant_data->mult;
- const int32_t activation_min = conv_params->activation.min;
- const int32_t activation_max = conv_params->activation.max;
- const uint16_t ch_mult = conv_params->ch_mult;
-
int out_idx = 0;
for (int out_y = 0; out_y < out_ht; out_y++) { //height loop
const int16_t base_y = (out_y * stride_ht) - pad_ht;
diff --git a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_opt.c b/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_opt.c
deleted file mode 100644
index 4afea3f3..00000000
--- a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_opt.c
+++ /dev/null
@@ -1,291 +0,0 @@
-// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include
-#include
-
-int esp_nn_get_depthwise_conv_scratch_size_opt(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const dw_conv_params_t *conv_params)
-{
- return 0;
-}
-
-void esp_nn_set_depthwise_conv_scratch_buf_opt(const void *buf)
-{
-
-}
-
-/* common channel multiplier == 1 case */
-__attribute__ ((noinline))
-static void esp_nn_depthwise_conv_s8_ch_mult_1(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
- const int8_t *filter_data,
- const int32_t *bias,
- const data_dims_t *output_dims,
- int8_t *out_data,
- const dw_conv_params_t *conv_params,
- const quant_data_t *quant_data)
-{
- const uint16_t input_wd = input_dims->width;
- const uint16_t input_ht = input_dims->height;
- const uint16_t channels = input_dims->channels;
- const int32_t input_offset = conv_params->in_offset;
- const int32_t out_offset = conv_params->out_offset;
- const uint16_t pad_wd = conv_params->padding.width;
- const uint16_t pad_ht = conv_params->padding.height;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
- const uint16_t filter_wd = filter_dims->width;
- const uint16_t filter_ht = filter_dims->height;
- const uint16_t out_wd = output_dims->width;
- const uint16_t out_ht = output_dims->height;
- const int32_t activation_min = conv_params->activation.min;
- const int32_t activation_max = conv_params->activation.max;
-
- int out_idx = 0;
- for (int out_y = 0; out_y < out_ht; out_y++) { //height loop
- const int16_t base_y = (out_y * stride_ht) - pad_ht;
- for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
- const int16_t base_x = (out_x * stride_wd) - pad_wd;
-
- const int32_t *out_shift = quant_data->shift;
- const int32_t *out_mult = quant_data->mult;
-
- /* Select filter so as the point doesn't lie outside block */
- int filter_y_start = max(0, -base_y);
- int filter_x_start = max(0, -base_x);
- int filter_y_end = min(filter_ht, input_ht - base_y);
- int filter_x_end = min(filter_wd, input_wd - base_x);
-
- int ch_idx = 0;
- for (; ch_idx < channels - 3; ch_idx += 4) {//channel_loop
- int32_t result0 = 0;
- int32_t result1 = 0;
- int32_t result2 = 0;
- int32_t result3 = 0;
-
- for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
- const int32_t idx_y = base_y + filter_y_idx;
- for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
- const int32_t idx_x = base_x + filter_x_idx;
- int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
- int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels) + ch_idx;
- int32_t input_val0 = input_data[input_index + 0] + input_offset;
- int32_t input_val1 = input_data[input_index + 1] + input_offset;
- int32_t input_val2 = input_data[input_index + 2] + input_offset;
- int32_t input_val3 = input_data[input_index + 3] + input_offset;
- int32_t filter_val0 = filter_data[filter_index + 0];
- int32_t filter_val1 = filter_data[filter_index + 1];
- int32_t filter_val2 = filter_data[filter_index + 2];
- int32_t filter_val3 = filter_data[filter_index + 3];
- result0 += input_val0 * filter_val0;
- result1 += input_val1 * filter_val1;
- result2 += input_val2 * filter_val2;
- result3 += input_val3 * filter_val3;
- }
- }
- if (bias) {
- result0 += bias[ch_idx + 0];
- result1 += bias[ch_idx + 1];
- result2 += bias[ch_idx + 2];
- result3 += bias[ch_idx + 3];
- }
- result0 = esp_nn_multiply_by_quantized_mult_fast(result0, *out_mult++, *out_shift++);
- result1 = esp_nn_multiply_by_quantized_mult_fast(result1, *out_mult++, *out_shift++);
- result2 = esp_nn_multiply_by_quantized_mult_fast(result2, *out_mult++, *out_shift++);
- result3 = esp_nn_multiply_by_quantized_mult_fast(result3, *out_mult++, *out_shift++);
-
- result0 += out_offset;
- result1 += out_offset;
- result2 += out_offset;
- result3 += out_offset;
-
- result0 = max(result0, activation_min);
- result1 = max(result1, activation_min);
- result2 = max(result2, activation_min);
- result3 = max(result3, activation_min);
-
- result0 = min(result0, activation_max);
- result1 = min(result1, activation_max);
- result2 = min(result2, activation_max);
- result3 = min(result3, activation_max);
-
- out_data[out_idx++] = result0;
- out_data[out_idx++] = result1;
- out_data[out_idx++] = result2;
- out_data[out_idx++] = result3;
- }
- for (; ch_idx < channels; ch_idx++) {//channel_loop
- int32_t result = 0;
-
- for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
- const int32_t idx_y = base_y + filter_y_idx;
- for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
- const int32_t idx_x = base_x + filter_x_idx;
- int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
- int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels) + ch_idx;
- int32_t input_val = input_data[input_index] + input_offset;
- int32_t filter_val = filter_data[filter_index];
- result += input_val * filter_val;
- }
- }
- if (bias) {
- result += bias[ch_idx];
- }
- result = esp_nn_multiply_by_quantized_mult_fast(result, *out_mult++, *out_shift++);
- result += out_offset;
- result = max(result, activation_min);
- result = min(result, activation_max);
-
- out_data[out_idx++] = result;
- }
- }
- }
-}
-
-void esp_nn_depthwise_conv_s8_opt(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
- const int8_t *filter_data,
- const int32_t *bias,
- const data_dims_t *output_dims,
- int8_t *out_data,
- const dw_conv_params_t *conv_params,
- const quant_data_t *quant_data)
-{
- const uint16_t ch_mult = conv_params->ch_mult;
- if (ch_mult == 1) {
- esp_nn_depthwise_conv_s8_ch_mult_1(input_dims, input_data, filter_dims, filter_data,
- bias, output_dims, out_data, conv_params, quant_data);
- return;
- }
- const uint16_t input_wd = input_dims->width;
- const uint16_t input_ht = input_dims->height;
- const uint16_t channels = input_dims->channels;
- const int32_t input_offset = conv_params->in_offset;
- const int32_t out_offset = conv_params->out_offset;
- const uint16_t pad_wd = conv_params->padding.width;
- const uint16_t pad_ht = conv_params->padding.height;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
- const uint16_t filter_wd = filter_dims->width;
- const uint16_t filter_ht = filter_dims->height;
- const uint16_t out_wd = output_dims->width;
- const uint16_t out_ht = output_dims->height;
- const int32_t activation_min = conv_params->activation.min;
- const int32_t activation_max = conv_params->activation.max;
-
- int out_idx = 0;
- for (int out_y = 0; out_y < out_ht; out_y++) { //height loop
- const int16_t base_y = (out_y * stride_ht) - pad_ht;
- for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
- const int16_t base_x = (out_x * stride_wd) - pad_wd;
-
- const int32_t *out_shift = quant_data->shift;
- const int32_t *out_mult = quant_data->mult;
-
- /* Select filter so as the point doesn't lie outside block */
- int filter_y_start = max(0, -base_y);
- int filter_x_start = max(0, -base_x);
- int filter_y_end = min(filter_ht, input_ht - base_y);
- int filter_x_end = min(filter_wd, input_wd - base_x);
-
- for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop
- int ch_mult_idx = 0;
- for (; ch_mult_idx < ch_mult - 3; ch_mult_idx += 4) {
- int32_t result0 = 0;
- int32_t result1 = 0;
- int32_t result2 = 0;
- int32_t result3 = 0;
- const int out_ch_idx = ch_idx * ch_mult + ch_mult_idx;
-
- for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
- const int32_t idx_y = base_y + filter_y_idx;
- for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
- const int32_t idx_x = base_x + filter_x_idx;
- int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
- int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx;
- int32_t input_val = input_data[input_index] + input_offset;
- int32_t filter_val0 = filter_data[filter_index + 0];
- int32_t filter_val1 = filter_data[filter_index + 1];
- int32_t filter_val2 = filter_data[filter_index + 2];
- int32_t filter_val3 = filter_data[filter_index + 3];
- result0 += input_val * filter_val0;
- result1 += input_val * filter_val1;
- result2 += input_val * filter_val2;
- result3 += input_val * filter_val3;
- }
- }
- if (bias) {
- result0 += bias[out_ch_idx + 0];
- result1 += bias[out_ch_idx + 1];
- result2 += bias[out_ch_idx + 2];
- result3 += bias[out_ch_idx + 3];
- }
- result0 = esp_nn_multiply_by_quantized_mult_fast(result0, *out_mult++, *out_shift++);
- result1 = esp_nn_multiply_by_quantized_mult_fast(result1, *out_mult++, *out_shift++);
- result2 = esp_nn_multiply_by_quantized_mult_fast(result2, *out_mult++, *out_shift++);
- result3 = esp_nn_multiply_by_quantized_mult_fast(result3, *out_mult++, *out_shift++);
-
- result0 += out_offset;
- result1 += out_offset;
- result2 += out_offset;
- result3 += out_offset;
-
- result0 = max(result0, activation_min);
- result1 = max(result1, activation_min);
- result2 = max(result2, activation_min);
- result3 = max(result3, activation_min);
- result0 = min(result0, activation_max);
- result1 = min(result1, activation_max);
- result2 = min(result2, activation_max);
- result3 = min(result3, activation_max);
-
- out_data[out_idx++] = result0;
- out_data[out_idx++] = result1;
- out_data[out_idx++] = result2;
- out_data[out_idx++] = result3;
- }
- for (; ch_mult_idx < ch_mult; ch_mult_idx++) {
- int32_t result = 0;
- const int out_ch_idx = ch_idx * ch_mult + ch_mult_idx;
-
- for (int filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
- const int32_t idx_y = base_y + filter_y_idx;
- for (int filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
- const int32_t idx_x = base_x + filter_x_idx;
- int32_t input_index = (idx_y * input_wd + idx_x) * channels + ch_idx;
- int32_t filter_index = (filter_y_idx * filter_wd + filter_x_idx) * (channels * ch_mult) + out_ch_idx;
- int32_t input_val = input_data[input_index] + input_offset;
- int32_t filter_val = filter_data[filter_index];
- result += input_val * filter_val;
- }
- }
- if (bias) {
- result += bias[out_ch_idx];
- }
- result = esp_nn_multiply_by_quantized_mult_fast(result, *out_mult++, *out_shift++);
- result += out_offset;
- result = max(result, activation_min);
- result = min(result, activation_max);
-
- out_data[out_idx++] = result;
- }
- }
- }
- }
-}
diff --git a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c b/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c
index 9167a43f..c588c48f 100644
--- a/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c
+++ b/code/components/esp-nn/src/convolution/esp_nn_depthwise_conv_s8_esp32s3.c
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include
#include
-#include
#include
@@ -353,59 +353,17 @@ void esp_nn_depthwise_conv_s8_ch_mult1(const int8_t *input_data,
}
}
-int esp_nn_get_depthwise_conv_scratch_size_esp32s3(const data_dims_t *input_dims,
- const data_dims_t *filter_dims,
- const data_dims_t *output_dims,
- const dw_conv_params_t *conv_params)
+int esp_nn_get_depthwise_conv_scratch_size_esp32s3(const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t channels,
+ const uint16_t ch_mult,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht)
{
- const uint16_t input_wd = input_dims->width;
- const uint16_t input_ht = input_dims->height;
- const uint16_t channels = input_dims->channels;
- const uint16_t filter_wd = filter_dims->width;
- const uint16_t filter_ht = filter_dims->height;
- const uint16_t ch_mult = conv_params->ch_mult;
- const uint16_t out_wd = output_dims->width;
- const uint16_t out_ht = output_dims->height;
- const uint16_t pad_wd = conv_params->padding.width;
- const uint16_t pad_ht = conv_params->padding.height;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
-
int filter_size = filter_wd * filter_ht * channels * ch_mult;
- int pad_width = 0, pad_height = 0;
-
- if ((ch_mult == 1) && (channels % 8 == 0) && (filter_wd == 3) && (filter_ht == 3)) {
- if (channels % 16 == 0) {
- if (pad_wd || pad_ht) {
- pad_width = pad_wd * 2;
- pad_height = pad_ht * 2;
- } else {
- // check if we need to pad additionally
- pad_width = (out_wd * stride_wd + filter_wd - 1) - input_wd;
- pad_height = (out_ht * stride_ht + filter_ht - 1) - input_ht;
- // printf("in(%d %d %d), out(%d %d), filter (%d %d) stride (%d %d), pad (%d %d)",
- // input_wd, input_ht, channels, out_wd, out_ht, filter_wd, filter_ht,
- // stride_wd, stride_ht, pad_wd, pad_ht);
- }
- if (pad_width || pad_height) {
- int input_size = (input_wd + pad_width) * (input_ht + pad_height) * channels;
- // printf("ask1 %d\n", filter_size + input_size + 16);
- return filter_size + input_size + 16; // 16 for alignment
- } else {
- // printf("ask2 %d\n", filter_size + 16);
- return filter_size + 16; // 16 for alignment
- }
- } else {
- int input_size = input_wd * input_ht * channels;
- // printf("ask3 %d\n", 2 * (filter_size + input_size) + 16);
- return 2 * (filter_size + input_size) + 16; // 16 for alignment
- }
- } else if (ch_mult % 4 == 0) {
- int input_size = input_wd * input_ht * channels;
- // printf("ask4 %d\n", 2 * (filter_size + input_size) + 16);
- return 2 * (filter_size + input_size) + 16; // 16 for alignment
- }
- return 32; // just few bytes
+ int padding_used = ((filter_wd == 3) && (filter_ht == 3)) * 2;
+ int input_size = (input_wd + padding_used) * (input_ht + padding_used) * channels;
+ return 2 * (filter_size + input_size) + 16; //16 for alignment
}
void esp_nn_set_depthwise_conv_scratch_buf_esp32s3(void *buf)
@@ -418,38 +376,29 @@ void esp_nn_set_depthwise_conv_scratch_buf_esp32s3(void *buf)
* Assumption 2: Pointers are valid
* Assumption 3: dialation width = 1
*/
-
-
-
-void esp_nn_depthwise_conv_s8_esp32s3(const data_dims_t *input_dims,
- const int8_t *input_data,
- const data_dims_t *filter_dims,
+void esp_nn_depthwise_conv_s8_esp32s3(const int8_t *input_data,
+ const uint16_t input_wd,
+ const uint16_t input_ht,
+ const uint16_t channels,
+ const int32_t input_offset,
+ const uint16_t pad_wd,
+ const uint16_t pad_ht,
+ const uint16_t stride_wd,
+ const uint16_t stride_ht,
+ const uint16_t ch_mult,
const int8_t *filter_data,
+ const uint16_t filter_wd,
+ const uint16_t filter_ht,
const int32_t *bias,
- const data_dims_t *output_dims,
int8_t *out_data,
- const dw_conv_params_t *conv_params,
- const quant_data_t *quant_data)
+ const uint16_t out_wd,
+ const uint16_t out_ht,
+ const int32_t out_offset,
+ const int32_t *out_shift,
+ const int32_t *out_mult,
+ const int32_t activation_min,
+ const int32_t activation_max)
{
- const uint16_t input_wd = input_dims->width;
- const uint16_t input_ht = input_dims->height;
- const uint16_t channels = input_dims->channels;
- const int32_t input_offset = conv_params->in_offset;
- const int32_t out_offset = conv_params->out_offset;
- const uint16_t pad_wd = conv_params->padding.width;
- const uint16_t pad_ht = conv_params->padding.height;
- const uint16_t stride_wd = conv_params->stride.width;
- const uint16_t stride_ht = conv_params->stride.height;
- const uint16_t filter_wd = filter_dims->width;
- const uint16_t filter_ht = filter_dims->height;
- const uint16_t out_wd = output_dims->width;
- const uint16_t out_ht = output_dims->height;
- const int32_t *out_shift = quant_data->shift;
- const int32_t *out_mult = quant_data->mult;
- const int32_t activation_min = conv_params->activation.min;
- const int32_t activation_max = conv_params->activation.max;
- const uint16_t ch_mult = conv_params->ch_mult;
-
int filter_size = filter_wd * filter_ht * channels * ch_mult;
int align_len = 16 - (filter_size & 15);
int input_size = input_wd * input_ht * channels;
@@ -474,27 +423,18 @@ void esp_nn_depthwise_conv_s8_esp32s3(const data_dims_t *input_dims,
stride_wd, stride_ht, filter_aligned, bias,
out_data, out_wd, out_ht, out_offset, out_shift,
out_mult, activation_min, activation_max);
- } else if ((channels % 16 == 0) && (pad_wd == 0) && (pad_ht == 0)) {
+ } else if ((pad_wd == 0) && (pad_ht == 0) &&
+ // because this does not handle padding offset cases yet, run just for stride (1, 1).
+ // end padding of input with `-input_offset` should solve this
+ (stride_wd == 1) && (stride_ht == 1)) {
/* process in 8 bits */
int8_t *filter_aligned = (int8_t *) scratch_buffer;
- int8_t *input_padded = (int8_t *) scratch_buffer + filter_size + align_len;
-
- // check if we need to pad additionally
- int pad_right = (out_wd * stride_wd + filter_wd - 1) - input_wd;
- int pad_bottom = (out_ht * stride_ht + filter_ht - 1) - input_ht;
- if (pad_right || pad_bottom) { // pad right and bottom
- esp_nn_aligned_s8_pad_end_with_value(input_data, input_padded, input_wd, input_ht,
- channels, -input_offset, pad_right, pad_bottom);
- } else {
- input_padded = (int8_t *) input_data;
- }
memcpy(filter_aligned, filter_data, filter_size);
- esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(input_padded, input_wd + pad_right,
- input_ht + pad_bottom, channels, input_offset,
- stride_wd, stride_ht, filter_aligned, bias,
- out_data, out_wd, out_ht, out_offset, out_shift,
+ esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3(input_data, input_wd, input_ht, channels, input_offset,
+ stride_wd, stride_ht, filter_aligned,
+ bias, out_data, out_wd, out_ht, out_offset, out_shift,
out_mult, activation_min, activation_max);
- } else { /* (channels % 8) == 0 */
+ } else { /* (channels % 8) == 0 && pad_wd == 1 && pad_ht == 1 */
esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size);
esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input_data, input_data16, input_size, input_offset);
esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3(input_data16, input_wd, input_ht, channels,
diff --git a/code/components/esp-nn/test_app/sdkconfig.defaults.esp32s3 b/code/components/esp-nn/test_app/sdkconfig.defaults.esp32s3
deleted file mode 100644
index 1adc4b01..00000000
--- a/code/components/esp-nn/test_app/sdkconfig.defaults.esp32s3
+++ /dev/null
@@ -1,8 +0,0 @@
-# Default configurations for ESP32-S3
-
-CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240=y
-CONFIG_ESP32S3_SPIRAM_SUPPORT=y
-
-CONFIG_ESP32S3_DATA_CACHE_64KB=y
-CONFIG_ESP32S3_DATA_CACHE_8WAYS=y
-CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
diff --git a/code/components/esp-nn/tests/src/basic_math_test.c b/code/components/esp-nn/tests/src/basic_math_test.c
index 715d7c78..5b96b990 100644
--- a/code/components/esp-nn/tests/src/basic_math_test.c
+++ b/code/components/esp-nn/tests/src/basic_math_test.c
@@ -23,9 +23,7 @@
#include "test_utils.h"
#if CONFIG_IDF_CMAKE
-#if (CONFIG_SPIRAM_SUPPORT && (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC))
#define IDF_HEAP_CAPS 1
-#endif
#if IDF_HEAP_CAPS
#include "esp_heap_caps.h"
@@ -140,11 +138,6 @@ void esp_nn_add_elementwise_s8_test()
out_c_orig = out_data_c;
out_opt_orig = out_data_opt;
#endif
- if (input1_orig == NULL || input2_orig == NULL || out_c_orig == NULL ||
- out_opt_orig == NULL) {
- printf(ANSI_COLOR_RED"%s error allocating buffers\n"ANSI_COLOR_RESET, __FUNCTION__);
- goto elementwise_add_test_cleanup;
- }
for (int i = 0; i < size; ++i) {
input1[i] = rand() % 256 - 128;
@@ -201,10 +194,10 @@ elementwise_add_test_cleanup:
if (input2_orig) {
free(input2_orig);
}
- if (out_c_orig) {
+ if (out_data_c) {
free(out_c_orig);
}
- if (out_opt_orig) {
+ if (out_data_opt) {
free(out_opt_orig);
}
}
@@ -289,11 +282,6 @@ void esp_nn_mul_elementwise_s8_test()
out_c_orig = out_data_c;
out_opt_orig = out_data_opt;
#endif
- if (input1_orig == NULL || input2_orig == NULL || out_c_orig == NULL ||
- out_opt_orig == NULL) {
- printf(ANSI_COLOR_RED"%s error allocating buffers\n"ANSI_COLOR_RESET, __FUNCTION__);
- goto elementwise_mult_test_cleanup;
- }
for (int i = 0; i < size; ++i) {
input1[i] = rand() % 256 - 128;
@@ -345,10 +333,10 @@ elementwise_mult_test_cleanup:
if (input2_orig) {
free(input2_orig);
}
- if (out_c_orig) {
+ if (out_data_c) {
free(out_c_orig);
}
- if (out_opt_orig) {
+ if (out_data_opt) {
free(out_opt_orig);
}
}
diff --git a/code/components/esp-nn/tests/src/convolution_test.c b/code/components/esp-nn/tests/src/convolution_test.c
index c86bdbab..f3802257 100644
--- a/code/components/esp-nn/tests/src/convolution_test.c
+++ b/code/components/esp-nn/tests/src/convolution_test.c
@@ -22,9 +22,8 @@
#include "test_utils.h"
#if CONFIG_IDF_CMAKE
-#if (CONFIG_SPIRAM_SUPPORT && (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC))
#define IDF_HEAP_CAPS 1
-#endif
+
#if IDF_HEAP_CAPS
#include "esp_heap_caps.h"
#endif
@@ -45,8 +44,8 @@ void esp_nn_depthwise_conv_s8_test()
uint16_t filter_ht, filter_wd, ch_mult;
uint16_t pad_wd, pad_ht, stride_wd, stride_ht;
- // run for 15 iterations
- for (int itr = 0; itr < 15; itr++) {
+ // run for 10 iterations
+ for (int itr = 0; itr < 10; itr++) {
/* prepare data */
switch (itr) {
case 0: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0)
@@ -145,52 +144,22 @@ void esp_nn_depthwise_conv_s8_test()
stride_wd = 2;
stride_ht = 2;
break;
- case 8: // same as case 7, with large parameters
- input_wd = 58;
- input_ht = 58;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 1;
- channels = 128;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 2;
- stride_ht = 2;
- break;
- case 9: // (ch_mult 1, (channels % 16) = 0), filter (3,3), pad (0,0) stride (2,2)
- input_wd = 6;
- input_ht = 6;
- filter_ht = 3;
- filter_wd = 3;
- ch_mult = 1;
- channels = 16;
- pad_wd = 0;
- pad_ht = 0;
- stride_wd = 2;
- stride_ht = 2;
- break;
default:
- input_wd = 6;
- input_ht = 6;
+ input_wd = 4;
+ input_ht = 4;
filter_ht = 3;
filter_wd = 3;
- ch_mult = 1;
- channels = 16;
- stride_wd = rand() % 2 + 1;
- stride_ht = stride_wd;
- pad_wd = stride_wd == 1 ? 0 : rand() % 2;
- pad_ht = pad_wd;
- printf("stride(%d), pad (%d)\t", stride_wd, pad_wd);
+ ch_mult = 4;
+ channels = 4;
+ pad_wd = 1;
+ pad_ht = 1;
+ stride_wd = 1;
+ stride_ht = 1;
break;
}
uint16_t out_wd = (input_wd - filter_wd + 1) / stride_wd;
uint16_t out_ht = (input_ht - filter_ht + 1) / stride_ht;
- if (itr == 9) {
- // expect the function to handle this gracefully
- out_wd += 1;
- out_ht += 1;
- }
int in_size = input_wd * input_ht * channels;
int out_size = out_wd * out_ht * channels * ch_mult;
int filter_size = filter_wd * filter_ht * channels * ch_mult + 4;
@@ -241,16 +210,9 @@ void esp_nn_depthwise_conv_s8_test()
out_mult[i] = 0x7eb0e200 + rand() % 50;
}
- data_dims_t input_dims = {.width = input_wd, .height = input_ht, .channels = channels, 1};
- data_dims_t output_dims = {.width = out_wd, .height = out_ht, .channels = channels * ch_mult, 1};
- data_dims_t filter_dims = {.width = filter_wd, .height = filter_ht, 0, 0};
- dw_conv_params_t conv_params = {.in_offset = input_offset, .out_offset = out_offset, .ch_mult = ch_mult,
- .stride = {stride_wd, stride_ht}, .padding = {pad_wd, pad_ht},
- .dilation = {0, 0}, .activation = {activation_min, activation_max}};
- quant_data_t quant_data = {.shift = out_shift, .mult = out_mult};
-
- int scratch_buf_size = esp_nn_get_depthwise_conv_scratch_size(&input_dims, &filter_dims,
- &output_dims, &conv_params);
+ int scratch_buf_size = esp_nn_get_depthwise_conv_scratch_size(input_wd, input_ht,
+ channels, ch_mult,
+ filter_wd, filter_ht);
if (scratch_buf_size > 0) {
#if IDF_HEAP_CAPS
scratch_buf = heap_caps_malloc(scratch_buf_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
@@ -272,8 +234,11 @@ void esp_nn_depthwise_conv_s8_test()
}
/* C function */
- esp_nn_depthwise_conv_s8_ansi(&input_dims, input, &filter_dims, filter_data + 4,
- bias + 1, &output_dims, out_data_c, &conv_params, &quant_data);
+ esp_nn_depthwise_conv_s8_ansi(input, input_wd, input_ht, channels, input_offset,
+ pad_wd, pad_ht, stride_wd, stride_ht, ch_mult,
+ filter_data + 4, filter_wd, filter_ht,
+ bias + 1, out_data_c, out_wd, out_ht, out_offset, out_shift,
+ out_mult, activation_min, activation_max);
if (itr == 0) {
profile_c_end();
@@ -281,8 +246,11 @@ void esp_nn_depthwise_conv_s8_test()
}
/* Optimized function */
- esp_nn_depthwise_conv_s8(&input_dims, input, &filter_dims, filter_data + 4,
- bias + 1, &output_dims, out_data_opt, &conv_params, &quant_data);
+ esp_nn_depthwise_conv_s8(input, input_wd, input_ht, channels, input_offset,
+ pad_wd, pad_ht, stride_wd, stride_ht, ch_mult,
+ filter_data + 4, filter_wd, filter_ht,
+ bias + 1, out_data_opt, out_wd, out_ht, out_offset, out_shift,
+ out_mult, activation_min, activation_max);
if (itr == 0) {
/* disable profiler */
@@ -511,16 +479,8 @@ void esp_nn_conv_s8_test()
out_mult[i] = 0x7f67f4f8 + rand() % 50;
}
- data_dims_t input_dims = {.width = in_wd, .height = in_ht, .channels = in_channels, 1};
- data_dims_t output_dims = {.width = out_wd, .height = out_ht, .channels = out_channels, 1};
- data_dims_t filter_dims = {.width = filter_wd, .height = filter_ht, 0, 0};
- conv_params_t conv_params = {.in_offset = input_offset, .out_offset = out_offset,
- .stride = {stride_wd, stride_ht}, .padding = {pad_wd, pad_ht},
- .dilation = {0, 0}, .activation = {activation_min, activation_max}};
- quant_data_t quant_data = {.shift = out_shift, .mult = out_mult};
-
- int scratch_buf_size = esp_nn_get_conv_scratch_size(&input_dims, &filter_dims,
- &output_dims, &conv_params);
+ int scratch_buf_size = esp_nn_get_conv_scratch_size(in_wd, in_ht, in_channels,
+ out_channels, filter_wd, filter_ht);
if (scratch_buf_size > 0) {
#if IDF_HEAP_CAPS
void *scratch_buf = heap_caps_malloc(scratch_buf_size + 32, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
@@ -542,8 +502,11 @@ void esp_nn_conv_s8_test()
}
/* C function */
- esp_nn_conv_s8_ansi(&input_dims, input, &filter_dims, filter_data + 2,
- bias, &output_dims, out_data_c, &conv_params, &quant_data);
+ esp_nn_conv_s8_ansi(input, in_wd, in_ht, in_channels, input_offset,
+ pad_wd, pad_ht, stride_wd, stride_ht,
+ filter_data + 2, filter_wd, filter_ht, bias,
+ out_data_c, out_wd, out_ht, out_channels, out_offset, out_shift,
+ out_mult, activation_min, activation_max);
if (itr == 0) {
profile_c_end();
@@ -551,8 +514,11 @@ void esp_nn_conv_s8_test()
}
/* Optimized function */
- esp_nn_conv_s8(&input_dims, input, &filter_dims, filter_data + 2,
- bias, &output_dims, out_data_opt, &conv_params, &quant_data);
+ esp_nn_conv_s8(input, in_wd, in_ht, in_channels, input_offset,
+ pad_wd, pad_ht, stride_wd, stride_ht,
+ filter_data + 2, filter_wd, filter_ht, bias,
+ out_data_opt, out_wd, out_ht, out_channels, out_offset, out_shift,
+ out_mult, activation_min, activation_max);
if (itr == 0) {
/* disable profiler */
diff --git a/code/components/esp-nn_20220724.zip b/code/components/esp-nn_20220724.zip
new file mode 100644
index 0000000000000000000000000000000000000000..2bac74982945cde0a010b467676fcff7de408cfd
GIT binary patch
literal 132850
zcmb5VV{|25+cg^7wr$%T+qSu5+Z{XU*tTt_W1AhPWAn@FbHCqs&N!$3?Ap858head
zv(}u~ta&NQfP$d`0Rce)p-SGU`iG*&>j47+je-CHA%FjB=IlTZ05CFGSh-kP0PLO2
zVw9)khlr4RI8S-w@0U$a`zcr3G9U7^OmGz_;9(acTLSuO^biF)e^@nt;_>xee@w!~
z=j&FAw^${r;B#PeaEjnsIUiXiQea#0+bFH6o$E#H0U}$^&uDP+Eb!n}B|Fd9EV2t{
z>B;uh5W3jbzC=j)BP7RJRs!ft$}>R=se|9Vzo
zy#gjWL_rE{c%oxo-FbzZ5dg{2eJOO%KC
zN70TZ-`~Oy4RGA+-MCg2XDH1q#_23x7AFxflyQYj=tg{05Iw+S{B$piyfKaFwMpu6|3@wFWMF$Aj>4!oyw^1XBFRT11QBT*Q8_LnJs5
z5c)sY!q&)`-o%Q*)6UjcRmXmv3C&MpUl6a}8ZjlyMQ6BzPxVlJ!mu#NR=HSOogsWT
zVKw1zgZjebhQ%>RU_;N^&dYQfR}-gI=kGE{JRQ^m&Fs}nFz@=8
zKsr4CiO-4VML4$}T4`51DiE+f2%pFS|LZaloPJW9WZvIE7Sj@QoJ72KgMQ~p!b
zOWIst>EQT)SPCjeEzG*Dz$aq0i5@o$yWFpmgk9QHkLypQI+QEJ2pl*F?`XEm>mo&-
zU*L41vEs${(ZCVX^(m+kRn8eR!EpJk73sy9M6u<{HXZ$#CdH1lzW4j9XGOwL5aBSC
z@pi@(fq$-7TxIjX3SB9TpM4x11B{z6gt)R|C_k%KN*Crpbv5A9hE*#9!U6{_&6axi
zZF!JP$c)(_QyqvC$O7TyaurGlXgT0r-NO)qiWyZBc=KHzlH?0x-xCDSic{(4Q<;a;
z`f&?K
z=~d!4ghlFMR<%5sk&F^j_YebFyRvl#1@&2{!m*$fG?w
zY{{qf8U@*kM1o8@)|#V7Fl`rxOqJGLKPv8SAgfa(OvANqRuv&O3=@wOF_)xjYcn&^
zL?ebW5g=hhL@X&gCmYq-?ZNP59ogY_=lX4jhU>Kvx_r>i-$|KdOx6$38`CRLXcSiW6iYk^KG5D$QA@lo2&nNJJX2I4^mdd@VQ=)S!AB87Vr~*hyG|Y$-zk&m!=6TNe}@a$2o_oe=(z0+)qVL3Dh!
zp9FF;6`dvJUZif27z=a@_}=v@frmP@MYJIfA*LxRD1L<^pRjc^i<2NVR|Cb}THzCz
zfH+j!=K%KGv};XqxD;2h70m;))WmsVu7EjgSsN?Hbc{X6?P8haq#EuD?=>%M*^s{)
zrE3#pD7>`90}EzrNLwwXhJ7pmu3Ihx%XK4ssl$hoBuPGo_)iel0c^Xxzd`s21_Xrpe-HkiYXAT+SjMRK
z12TpF9fr|k#dKXc8Pf)HdAX4_q!
zcjmS`;hwbaks8f`r^EOASv}iAWyK=Zp3vj+xSN(-a@G_T88^tACB&OWQ3s?ut%#e~
zc%dU8JW6>WDx_eEhc0jl%swK;p0tKM?ExyV*J1ITUdLQ+c96I&pmR7UY$(txinkEJ
zgwDwjZ2?_dV_~cbq_*wldijCo`;r6@iEegSd)Q`@9Vu^;hDTCuAB!Vntv$V40LsaT
zoTiG~OoFP9OiO|hDG&L9-1$q-~J2V6LqrMIUp7JkWp!88N4`C3c{r)$ih{Sr6_PF`;Lp_+Merxn0
zD>RBa-F_cz#sW6&5tq&oBEJAcr7kScL`PL9!Y!X_MS$FJfjg5hACgi_Hx)heG5Jh6
zuaaw(NZGo585lb_%2JpOuf7}#?C8QZJm+Ma`;2y>@~8x*a=rCnSuNgHwp(>yA}pP}
z)VrBW%CKWgmt0}o+_3|*QxgvEBz(JFUP_qtNl%zesVx>u;jmdML}ew@)W9D%4mtCX
zlQm+^soJ+zEtX(TG$EzyTLUykWIR*EV^oFI!-mr)-`Ke8xpumb4U7QJRt6>vmZ7Rj_L)pbezl8V4ioz8H1K+a
zCDz_rspd4=VkIIP57bVRz}tO~wx|PsAJN2mC?e+2ByVq*omZE5e2C^E%aO(Dfg70{
znKo};h|cur)>9}@Hg-wf)x&P+JP&T0qTLX0#t)_rABdBg!(+
zIj}dH;cM*%4(*jGv0bag`Pnp0rKQ!&T`ojLR4x*QJ;zmx#u{B}GE2gYu5{vJToPkk
zIe*a%IA2)qm8qK|#S>pZdUpUbD9I;`&7O!Ndvv*!00Itg@mRv4i|E#E%+bDQ6XtOdh>{=AoU4MvQKwx`_df
zqSo)is&7X>B2Q@n&N|fAsBguQLawKmo!gJ@YWl>7vv4
zOT%2cX(1`pUIsnuagUj1Y~{fAPAu}M%w)X~fTqN;{3;BYaz%w|!bd1S&IN`)X7w|L
zWi&si)}73&FMdFH(|?sZBUS{RnN7#~@tDn{%gt9(FJ+wiA~|
z66dy>iBa&P$+wemKgqQtS>#XW%EiuH-v>$N;IHh$d|9dVV^eun
zCP@HGywU4DZoCFW2nR5N*>#kwS4zDhgMY%hvRr{{@Eg)O>_9+d|L?H2G&3?ab8=>|
z{Htwezrlg@sh{_?7w;>jOex=00Na56E3>4sev;bSU5ptHgkA}QIelJ~rsnP3-6w%=
zt$}`=U>SkikoM2Dz6T@Xm8ff{1L|;8h#V+%chvSU_U9D}KrBm<_!nYfXuJ!>t_K>D
z06Y|Lp!d7$>4>DqOv`{8s0a)tU8HN~gCIkRm)DQ7C8KY^_JajGQW9>0YTCxnu
zpmS)aq31uC8XyVndyyd22j>8Hf!thFyL-=M+0J||x(@f?%#FR1M>zPIHGyj+Ur^`|
zUNdNbcLNYgp&3JZUN>?2k7S^93bZc@QnMF6qr@94NYOvPxuwcd515HjIi$?SUDLDM
zzV<;}2=*Kn3HuFQz-nI$`^yT8eej}pw*9dNM?c@5KK48uc!S=a9GnGx_^}W#!=LXV
zUI!Jf&kdhDK+*8SBTp>w4a}%iEt3_!0z{LSLJtCP{tzapM}*d}hn1VSP{T^eRqVUdm{AkT9{t?BhsM1yf~1Sbse6lLP6+VP`1PLDs;4HbOh!!cf4uemJ_Vb)NN^
zU4;ueXG)WS9LabP8A*qx2DSgtvvTql*(*(RF!KFCVxAEn>W|?Gjp5y&Al9ujX%d#~
z#N=9PG2kIc0BgjyRs{F2Men_Z*Zdgq{7ZiSnCfskIq!h@xt;T#78~pA4cyHS7P6QY
zv)Xu_IbSxex*SpU1}0bE`q*X+XxL)m6lU_8T|Jw`+R*`f*^~}3G$<43bSKWaMOi34
zXbtri?+z8@i$3osxO_x@d$wv0~!Mk5T?YH5o3Bn10sGJ<2iD0u4QdYQv+v?onwa{F3B#G{v_D
zD>XWJnIc~!HfqSiVmRd_sCo-6D}l;${#h;`l`(N4a~ca_wC6}?@N8=<;OZ>nH3p=k{#u7iz9(lAN-!JW&Ttj{*sQ2o`%Go
zZSF3qL5F-WhdU=v3&{F*^xNMBJ;-cx>b&hs8$xEs7wUOVTv3c8IM8Dz8KTaVMZAq;
z%4^RV2M&P~%Q~MJM;0h<)63vA0cOrgQ738LUsaWDwzt6B_pSa~y(|2}tef>s@O~Uk
z2)&A6zO2!}(|Q7O_e~&K$}>JG1w9i4F^<(iNfmA4PRc2A~-*kithbU4?5RKV`BgCZ{T<
z{J>jciaRT1I=KJ1{yCBfy8zfhBQ0^i^!B)E?(5vS7t&8uiKun2l-PMynd>QqT0Q9Z
zaaiU@BeYJr1{>Kq{%*Mw=mFq2P3(>0zJ5K1`Q^UqD@REim;sdTDoHZb{K4Jn@iY%@
z0-C=v#WBM
z1966VXa~#2MnzmdWvOyPJRai}cN{AM@4EJjsH1g@k=y5hl^~9IKsC|}N4nI^SNQ=c
zD=WWRCg2{zs^J@#1^+1mw0`7#(Sia2-FyoHyniVIOwG){$-i4|$7Vwk$q(_5Kh}KO
z&~w#(Pxh7c)=+UtKR3yfbnc`yZz4odN(XPGvGnItrrwuZ*1QOJAo6vOW`>0b)3lV~p4_Rj|h?7A<6&W4^3DRY#Ej5o%lZPFAj1!dE|iu_d0hWsEqi
zE>SdY%7jh#Lb(4NPX#<2WbNKOq;6F!Lyb7FiB_v(Ir*=Qd{-)>KSYHnz=SHcSYtxk
z8gfz-MNs&T$SBoD5Ws)Vaf;+MrMePPV|>is67oD`HU+_*?<~+<^*9v+@AFzbRcnIu
zTzI>4Cilk)MSQ$38mWhI>z=F0X&aKr%fx>b(;)X0pXIgWIYl*$Hd}0K8MAm2>SjBZ
z(m~+K#nO}06Kb~ZQNKQc4)K}o8siPJ^#nW}c;GyNPNbc_A8Z|S`(W?v#97>(l7)48
zxAkOh$sR9sru+B?++5K5WK08I1B|oC21XSP6q38j%QO&1enOHpc!^E0|56q&FJtwr
zbPDP66S5ISW3!}n>gkfFItx%y7OUc_;!mO|pPtcYS3gWa(W4HtXR%td4(Y|SVd10N
zemyK(;S~%f=#rr`g4mbHbh4SlrM-c+9F~r*(1n7vYlM{^ZUFQBaY_^_yWm84mU{Si
z{1wBb>9cAvH4XmEE)}TzdiGimc
zYq0%A0kK0Or7Vnm|7aILnOb30#q>;$n-2yb&)$?`?d0lsFP!L`4M0QS8B+T2z4pXs
zJ&R|mgalRX3|V#J9*Jkq#d^&EEObT}BcyOM2ovRMI%}F9zrCKIr+K0Z4De@=x7`#;
zR)-vgvNf2NO2_rbQmS$$3N0Jm6dlGybmctfMu!ClIzK%HnNA1|Uc_lAnS_^@fPrEY
z?{iQ=%8w1^`z?da@D5vU-OAU)_188`Jc`D$#>qWq#yPLH`syY~@|c-a++s7!JB#y-
z3B|XE5x>1h9#YM^(YsRq+1nXK7c$JHnU4I<;VULn&fO@*L8kHZ*gM!iSE{@LGQ!E8
zmv$g~R{<#)8eh5f;fcYm|KpDQhlPI)WA+sY0R+@W4Fp8^FCVgR--DIKnf2d9_W8ew
z?5}T;JxKhPSQ3lRD}-rETgN7+WTL0Mco}Jo1*Tm$d~Tj-w1!^}
z>Q7zn8mf4fp34V}D6url(oVkgN0C7~FT$`^_ou>6XBQvxM{XEqj`&%BmDJq^N*N+%
zucJihSKLJKYp^l4d=CYu!xs{xM6m?~sLS8rQf15kDY1!N(sSEB`9LA!L>-)1gZ56q
zwl|~03S*aV&A9x&u07!l00O=p?SzO|;@8*E0K&^1gwerp?=CM$&L@Mtr@fC21VuPJ
zoPr$}zm!5*@DwkKAW~^q`R7e8a^o|W-1fE(%Zxw+*(7XKnr;xi`=*@u
zsb6HuUoFjSY@xCi7Bjj1*zYAtwx|5$kt>A)QNI$Ffn5d{S*I0|d6mHUQZ4s*M}`$h
zR~+{3s_TU&F1Ah%4tITApdBU2oTkup6u56xvf@vp4IT6egYhnox(+f
zxC0+o98Sppl6d?86So(yvt1#$I+B=2INE2E!g}~yVJT_BmIzyNwsz-IhL*NBM4&B}
zEaphV9QpfOfQSB@0QaD|S-J4ywbX`s8^KhKv?0oe^WcF;`#Kb&?kh8%qC~@`Q?3s4
z?4#)og8rf7pZx@>fq#PN_vwA85-8f|1MQ4bqH0O68MByDqC%H<2MX^OL6DOyEkW(uo3Vaw4txrkpXHLG)*1~B~
zVYaL-xH|dMI?rrbN?O!YRY^<>yTG+{7@4ZnVMdV>#=)(4%bupMDumR2gtL;39Tl4&
z>SI$OM)X;Enhzpg@r(H*21gJ23-q4Z&uo=6T^@YUN>YGJ{4#AY?FB<3O
zunOh2YlkFc%Q^>VYsVIFOilr0__&`qYXB;A2!tX+mU7C^tnSN}O-;MVBlXo|qcO~Z
z=ws885fcW-jWBVshzr7GQ7_m5GO!^{Wi^v(t?2$2U+~m1MIPl!U&IKqH+`@LkhK?G
z=Q4hLF$X-0S*+T1F?uZvJ;*whg3-D|^I;gS4&d<0NgkTU4a>5+-#4qWRoAixYBk#^
zLRI164x^%)hXE2<+(jKxG~FskPKU+D4=ORGJoR!0
z#Y$VW6U98Ws#|b1YnjU5&f3%<$Hrzt~F7&1{y(CyoP#>#JkXEYtnyBv-wAf=$fvZ@p>ekp~
zGNWyaNKP#KH_fu*hp33{^WwbU
zL{C1Gy?Jzn=#v?Y`X~BL?1WWVBU`zmc%PcaGUJcu1A~kxGCpfO=7Jud`UOSzU1jq|
z^QjdLcgZvs^t@f`&K=4LcD1FQao81(e{cLq@xznim!4|87-@$d{`6S<+F|H^2<*#Y
z4MOqWh#MP3oWYM!%l$)L*vX>pwzF4HCwD+`C;ll7NUNN9u4(aS2KnFYCGd%pQul3t
z>V8W^(tpWb7G?l5Co2;Jdj}T=OW$t~bp6`{&HeU3jTz|+ip~dB?HEny)e6;^*j?0e
zNe*;TY$CGP5UAfCczI`qi8Q6-eRq7Hw%jR%fm$oHZ4wSKYHDk08BQOdOG@S~qa}&8
zaifX@@2D(Tey6tuD9m1C9W%9(=oD&ppo>BiOW
zZURGO&hGA49&1+ONVh)gi0~?898#H?D#uHVNM)uDCR}140yyd$J
ztehR=N*|5#$Xen(Mip>zpcxLed5X%mrBUdlxS^^wZpqvo*?=Z}YV%ejpfc%o2Rzto3e9uMO&$b$x5l^btmIU~LO3
zKDtok`RMpO1jd4fqBZjSbRCUwUnH2Xb0Lz%u4*w&>dO<
zTB?@6+bm8B$*hCNBXHQao9fv}5YKHJR32Y2^$2lWAg|4AS)nOCrdUA1UD2RA-ju?(
zv)xe|-IUr@Z0*$V>_6z#;xzHp@LhmdfmN+nN_Sg^NMG-4<6=R`Ka5H@R++NF$NKdy
zaae{9lWj%c^(Xu!jdR{Hj3SD*M>C3&OvHkvQ(70tik_15A6md?N_L$abii6;!gC*e>Ev?DGXWKtkh3PQ<9
zq>3L-EktKG>Yrgo#V|2Bfg
z!YRQDT&Gw3bi1cy>S0~qzfBeJ34>put3wF%EY)e?PP#>n-X+uwRhSfs8*&-Z}*_^#wvyJoFIQ1`}^i@dG`kv
z*}A`0XUN(Jw{>^>dOoi4``qx}l;>8;r|OVzpC~uW``*-pe0oEkxEHF~Zz%t|815
z3~4ax`+VG2*zLG8<_hjyJvoo#PQ`5T*}t1#zF_tq6hCpg3sw&;-D-Dr=;#RgAeQfQ&x!;rhe0E}}QLEP3`tgGkQQ?LamNFB%
zHARlpO}eFEG{W1=U4Ehcq!A4ak58EU5J)$=|gxd
ze+Q@XYl>G~_&D`mXASnEvmS+Z1HKo{mU&Lt&Xg3O`5Hm+L#Ff(DHX9yDk^#){WT!j
zURebXHwcJ!5OSNlDd1<-)c1A?Q8T`DR^nxd_+R
z0!gsYG3h+1)?9cpTH#b{xM_zD+0Ev#D8tg?9%sLBFuAk
zNoc-m?0`&KF>Y3qcOJass>vDe6VFM2e3jRt01yf<^M~x#&K=JsTdlbP1LV1u9rqK|
zu7NdW$VB-+uSgih*r$TYwjeQ0(@7|;>=GW;mAXRt27AJn5ChJYP45LRI=>B
zyzU)jbD<2nWV7Zgw2Vv5pat>^&JP<$K5ab%|5FoS20NwFLnNXtQ9FU9`(6ayyDS1@
zPnxPBi-;YHmyK?JRQD6&6T$ub5Z94+nTXK8^Ts
z0Y~x^G%Uj@pt+k!pKuVE$wJxpIkaR~1u!5gb2@E_%8u<;5M>ox(wRZgavO_@@~?u}
zQ?A}U4s2w^(7xqVAZAjZ;hO30!ksBy@?6BE^!bW*8%WjayJin;$BosDTrfJT=+aga
z8csoykI?|!@L-#!Jb!?}iYjd*DZMni{25M7YAleW!bXxL7_ek?!z~{1tNd*ji*&6W
z2NP)8d?wteKwV;#8_y}yKRq(CLqz(_J5xgybC%}>h2s`9a6H6o_OP0%59?JD4@tvu
zRGc82?OYN6YQtHa<*^vorkP`A5N4Uu3^Ah|RI;`ank6FqYG6ssKatJ0TzQYF7%j3&
zVH|BjiXyUfwc;iT)n1$w2)zCH#LvhsaSo|hL0{t40f`;?OYl%sLuLnYE}!(#%VXyutT6#x=t}U9<-UDbJ6Q0j+#=3V-K~_sHd3Y!mUZWsFm+#E!OxI?49M;RTw+QU@FvlO~TLDdob%ao{MLxJOa-dPAhm65!P;jwL95ni@FHE
zSCymE*2KE!uR$qH&C-q>6ohPP!U!}~m!6~!v}{4ZktmXnD0tbu6(N~TQ_;_sc~%`AZ6Q-WO-L+i6B__2!Zt`8dVuX5=j|rP>-#fAQy@q;&AO~
zR1X#|HO7m2O8+4qF3hNm0V0_BYEUmFAIg<`uY;6$Tq%F^UD)ogVa?y-6o44#nN@3-
z#Cf=(hW9HdAZna;_u$LZrkM5TGoVVb2i9o^uoOa2eg+JES4^)5Sw-%S6C?$U?gkTi
zSnix!xgRWZ6{{0rRT0(WzwGnz#E%XuAr5I@n1-WW5|-6Ga0;2yvG&V`t4P><_%YpS
z2-&7QpR)O)E#2W{`^52IX3z=*1|Zg0hZyTOfOZc8NS1@YI8BCg9=Cc%1OoxXtAh1v
z4MZE%hg*7?5ZTF;K*-bjv98Y|`mYBFKRe)0Wocdsz!8?YD1o)6lSW4p{p96o+nw+@
z33pytGF$X4HeR`BCA-VN3Wx)8k+onboA5e{f8Au4{$?lIl|hylDgW51lS{^Q>GDjs
z1E|F%(~`_h9|3JD36&q99(kAYkM2@Y9gm!8g;+8|G0=*^RgO_Ze&iX+Lt`PFNMKvA
z@$_rIe#cS6m#9LPD&bHKlZkk!v6qhX{x&uoAv$YM#mQ;tp=14w*Wuv||Y&dDvyQW+hSG9@ZlPYGjGZ>{5NjWi4=HVZN
z3c}_VJmZ}oIZflfR=ILt5EUuSE0J=ve`hpCz#xJLS{MrpSgg5me46IjdNw7RpBW%u
zrP}0Je0k%t;!AO{bh4P6qq~?gGecwdql?cDnv7BdALlyZQ0yL}r#&SJ
ztgVj1V>4I76D^IdT6+RX6?sk0C;v_|`7q{CgrD$@Y^QllCWsI(U=6=Bql`zBi>af3
zY$z8dhnq7{SQA#=b4w|CU~+kwIMD%yEv1#!DyqZu$l8;}A|wwoM^6?}f0@vZYC*|5
za@2b~hMNC&2ItKJE8v`7a3`~XXMZG_bb3+lYYZur(!NAaL_tWZgwHetwmc6(&n5m)
zntrjNVWjNLVR^vzjid{=zGO#KaAA7sV?0EME$S|eE{CDiFim3FlE{%Sv_9HKQ&dvE
zVkPF_lGqwdFzDH1px)$u)_4CC4Pe-AE^CPOEhVVeC@&8*PhWGD-OPw~5anq+)#SkN
zc0$7-2d1-}IVg{(3_JJf_%LvF4NRyn(E8(nopYvsdbt3rnUSMjd`?osXLInzEqwsr
z8m4NX$<3K3%caI#e8-*OO<{Wo{gbeJAh|qkJ&*7Y8CQtyp1C}K1xvW!$bl(ZCCB`0
zWz|6{xgPgBHWap2Bx%!k)X5A>(OY4mk(Y|azisRekMdH%*)Z*f0H=uh4ak$^9i~}?
z?=8&)NTOI}EZVL{trwnWQ$wlW-2pzu$Bq71;M$xR_bI4OaC$PAse>fiQE6Y2Y5o#(
zYr&gHHkS%+LHj2Q=E}O`hS<|cvj<`e%mJRVIGNoFDKUnlKKs~J(W|aPnUhER
z>JvG_`kMECj$cP50XJzg7{6ECqYe={1-Iy~U~d+twkiqpkC&dJIHTTjxl_MYZyG1<
z7yFYmd|dHcypNgUAX<8^VEbJtMt6Wog+Fp
z-@vI|<2so33`;WwLRS7)3!M=>4kxEjM9$^>Wwiv+-Qs7F03m;q^@3$xld>L0&r+I8
zl>G&^51|0|8)lBo?38ff%H+FdH~bX)2y9}Tl|t?huAXASTv`GZO4m>bzH#T6qv}D!
zKs`ElC)RH2yu+LGkQ-j`73z+I8OFW@J;VObi6G0^T&x9h!+4y)%n^^-EQtN(olTV#
zLK|*99YW3t+j-*r>D}BDjy5*XjpQ~sXu|Gaab9!dxCcMnO=O$D%M+3pVD=4{?4mW&
z`mjKk+F}62Ic$pUSw22A66TomPRXnp=&&OIwYS*`iU>xMw?K2_;?Xcluc$l{npmfer(=nxjvW~(Ie_pWN{LIu58ySdOf7j_&58mL%8@TT=45t
z*v&cE^jA%V1Y@%sJfKi5teai_DrMg8lnQQ?68u#u$Kj7P03p!J#-5hxG_j0S<(=7nTk(hldq>crjIrb$0`#aiz!p@j(hpx8#Ea%Ob
zvCHw{kzQZ>{f_)4krDw&d&f(@m%XUBo+tTGtJC!>T_*Bc4YD^)97SnjohXWq=S4tX
zBGM5@(Pl|iKF^Kl%a>bo0lr-1*Rq|jjCfBPAUwaQy$^K!SShsGHW&;-VpLlO$dQ(h^_rt&
z-iy2{-{114BI9BQK^p7~WC~r2B%7
z^y+T$1+N>;9bdneN=iTT#2%U^bu8G|oy3y(?@6tH3OYSXE_XkuOUoty6rO>D0uEVd
z&II0rJPWAkzu>YcbG&D1@NxHIW1IZnzF;H(+?wRO5rP612ngwaN_J&2VNp3T20POf
z^$oxV7m_bxygwEZqZ=`8hYms=;u_m2j$M{*cBvknwRVXuNn#oLI2Xd}4et*rHvoNy
z8v`0QkA;Uh*C1+J)=sQ><3Zs;VJCdJ2p#V71yNFpd4V#K4ZU>!GmJSa)?}NNCNOJF
zYePGs2%th1Ho~plb~lh+1P45=0??*R5^hUsJ>2XB{RKKHr}tZ&R8x;BrZ)NP;u#7^
zpbx(`*|r9zJ(N+zxeY^$)X2fz)dQ#FI%NfgMZG_fw6I+L`YDX_J-1){Tsu2ErzUj!
zS8?X}ZSs?T)>)TOypj|VX%@kag!(eL#^4GdAIm#eSXIkNz3T~SWx$XP(a50&jWKN)
zfxx5i;u4X@Dxyb?4-@BBJk>JMDjA}9G3K->@rA(l84nL4_j*a(l%JsSkK(qmn|1y~@69TmC
zFiX;@k+;TQOZVO#N}~2mIum{LA%h7eq?H==fDM-5{xO;adbe?k{qD&d+T{B=#y03+
zf$Ik$b^r3TEF5sdLj3|xyHjb-SwkEV^vh2Lw6`BFeW(Bc5wUMx2Yfy
zerEv+xaDisyWFDT!$MWheeUl@621^U#g9n_Me>97oCsB@R!CtS;t9A4`pS@!_%4e>
zkBMG0*#aZaG26uqYov?H7_+|d6fvWACnMurI2-Ll&CGmInOs=q-;7$cfltj_t?yhH
z{y>Wmt()AmA=Ab6U|Z0(_j~t)F?JIz)Xo+?)l?pGE^eXi`_fswFQ?SHQa^2#tki{J
z{ooyH4{RiT2@_LZucQqy1#)PpvN4``_0sA?u$yT3oYl;}Gatys#8YT>#Hu-U(p#ht
zWv8|kjt{;L=AbOob0<>eX0>|&dVdoJtl=s~J$nI3R7*RfkkQ;28=ZNhJ!h-?!uhGe
zvHL>in$KLn3aZ`tllpp%h+X75B?eB9H7^nN_G;K&$k~ION$=wF&h}m$pGC54!HV6JaBTJSeAP<902;d-J-PDEP7EnY!Jhqbqjn7_#OGJE22rvl${P>Wc3OTJO_l@@7^$=L%6Jw
z+{m3?a%x`T)NdPtQDOG}gkyAnWO33*YeNh)Cdo}K886GL<{~&rE$@fTLCNS~5%?Qu
z=)gRLg6l#0ejwOsIPETdr`-N)!14B=o15{@(|wLlGu
z76wP|yLs8^zbw%IyNBX?OZ~4N3TG#i|L&jozXuZibD)uliT8+udgQ|1kSM+A$2A%xqo%J>$+`Vy1j$v%!S)K`{2^
zU}})a9KY(CjURw%DAfhAl&^B72Pm8i>!`?lk*yg3UP>2U5_RL)N9~
zW>rl_Fke`^|7Ac~SGS(mocyDq5y1v_un4Vz3Or`_+zXiVywZ;`@*R{jxuEHnFexkB
zu5?H{2DLwrDcsJirZS3o5;YvZ(X4RroagLnIfQjRXgMAmhE@pOA64RAmB@8qDM__z
zS`-#B%kh+aMM_p^62$DE>pcBF{n!~AFESKR?o5}p^V}Z-k?SqlKMlPKWRcap8)5+a
zzlT}0@EUJ^tY=qwa2Q@&{0Q6Fz7(=_OO2#|s}_Zw2%v&vIZAfl#&IHxV%ALQRL6qj
z!Pc^{+-TD%^nM)w=q0R7Jjin*tnID3Ty_xb?apRgxgiN$qSviS
z)wto&r}uooKKc#W7MrGFB1C|PIZ#^HyG`|yZ`_h=81y_LTJu;^HInQk-Oj7URv<*R
zNQyHq7bvch^sr&GW{lxNQ!x&WY5dhZQvBM@Nw!cij>J&)Te3syB7t!q)5g0$wTo`XRHbT4krruSrd+nAhA(GN!|c_t)WCu@(z!+
ze!8rlUbyi_l?7Cj!SX5rCU~C?GCRlbFJXe0^R662vV_D9TIdEtFd>PkA)3ix;n+!d
zC0U4!#Bb6iO=ml!p)tZKu1)FP$O`ADA{`Uzht*ZNXhnu572xC9Q3i)CVGd5gb>m77
z%Bi#8v1WP%d~*Ba<_-=lS#Wpc_V$ng1Qe|YYtKeO?!AMSx2T_33IMshF|5lUPZbp`
zvJ`7Yv|?My^U_e~WQY-^F5}C#5#_^ssUc_se^Shu&B3UrIHG@7IKerPZ;9Ftf+1U<
z>glH%VI{iA%yy5}N)8QR_5Hpgp|BJ$@XT%{Yz&oLddHPRd!GgA+~tmcc_xHFJ-mZ~i|R+)QYoqv
zf-62ABwEdjMUNN;BWIF;f1Tzc#~5%lQ{1pgR-3w+0w=rQq%~mhm{k5;X8cOwxWTM9
zM0Et$w_mK)kUYO?OWLoT%<3k;!uZa7sQ7v>+dG{5R8-~JH=))vg7%jpK=eLmHtR2X
zbd@@JtczXrX4e_#V3rT5boGj5v!?dgt_&upm;LDhgf068uTv)D99_Ns^=omlQw7unk8~m%n!PpphtAW=
z3c!E6ARq7aY}N^e)E+v!LT|UMcdztaz22Ve}aL7#oXZV7oJiK_0Dqe{2BE0=6(YXAYhK$tf(`nxE@&
zhNMXG`3L#YzJqta)tKig+y~t?9=`Hrpgm?ggv8kIof{Wa?jdk{Z+Au4|!6K
zvp$8q8b!jPcLjoL`Xc%K+ZVZV)ZZ~}J%S#iECqhXuvP|&-}^E<5-S0t`Z9Ro(YMW`H~?uNz*BAW>qR_4}&bG?O_Qq%NsWVTMI|ir@F^jR5Ih&X9WTkrAZLW0=x;JTp4V>&J
zHOHRcdt3C=kA{Yv9`R{rxDbTRd%e61Un9K-FSFKv@lgA*?nEJ8#X@_!P8n>n7{90^
zOf;Q{*w<1^GrOAp(2`_%%C+oPJkUQA{%hXH_~!;`Y~=hMsjxG0vHV{oj(})}!T(L*
zE8m3|<-eTspVVz+YWknd{Z~!T=3M-{B>N)>cN(R}>sHmHn^Ck&!N3kLAK~NOp4?DX
zoWM@>w_;rl?aArOTjD%UQhuJ|MkXIavw_WdIxBr^4I6G(`uKMcMg;iRq#*C%2GR%x
z<1AJru_jJbdZ2u~WV0lh=k>WQ9B1SCL1BO1XFq&s8JxIc1r%%@^Z->FvEXtl!FyOJ
zadqESu5hUvmsX~Hm-A@-BDtv(w6qQtJPS1TPinEga3p8gBBDjDoJ3V(5it(Oz-2aH
ziCa&(f@AfDUQ^L&d&dRTqAi%(+M)ILxBk3}iXNAOw+>CX`IJWNP1%}}1w)^Sf!4Y>
zn!|OWK>t)#hxI%1v8g}^9E^sH!*UtpBi2UNen_(yN8Sl(V958wo1E-%Fw)=csG{^ILy$=6agj{i;G
z@e7)}XMJZ$*M9_162^Yk!UP?bIS&XApe>%Xt(^G+8U1?$4t9Wxb6
z*Y0Uu5WVC(9NJDBTb&pf*yY{Se+E0L;66gPZ|2g_TLfZxY(au#xOk68L+^I|w`w2_
z2uN+OIev#`EQEVeQ=b0Ov*Aj(Sr(>KF)gLT_EL
z!Y!Nb@zofI2Kf%{^CNNZK2-I0QsJ$D{kv|;8QUU{)C
zTGW!L)^`7JbH##sZ9TAUi)cgkuTrw>mO87RC9eDKcz*Nretg<3Ewqps_6a*MAp5Q}
zJ1`)@6aD9lpD{@f3*uj}|5~*W0RMnBx}+LZG(UocQ)K{vAI-vlqV@k+wg0gK{}anx
zYyapOej|R@>IFoYaoyq4A`@6BbTltM(&cz&O4!b{?>KYeg(T`{II)oagbV2X^>zci
z1qC4xk@R#?v~o0ySks|FivsVI3M?x-`dEH15Kmi0HQ*~EzvUOzO}PwAGw~Fm!<&fQ
zO+I=Xc1fhKOL|QIeEoy!@%1OH|D5uZxNebiR;q^}%ex<1QOD`tF=uK&&hl=I-(8`!
zD!aZwE|<8UdHM0`l^f-*ej`5XDB!3n(pnOZX(3U&1e5&JcV-n;!ih<5ySj;G{4{&3
zWVz)`Oaiko+K&peN+Sip0|>_I)n%EKM4}{lWY$HchPenl&2G=z{^$1FCj0GW-O+KDUt(`w?^{1H4h`7XtrufexoPCe
z5wt-E^b9L%t*T;)
zr|?hp>QoU&V5MYLh<^j0rX{6GOV<9i&c@I39nxxEP8fSE6vzb
zg*j~34VK_RGLtTuCQ5?i`4LQBTK+a>ed>>2`Ke^|Z}xIR;t3(h!?L;dBhGoz9p>
z_PL!4_}I9T3sA6eH{@Alj^x7%Z3>y~h1G!|iIMMP8efPikUUt;Ya3^l5o)OxmX}q=
zsXaI=>6x$ZHKcr+#+QZ_q&Bs^vyvJmPkLpC>elSmhCU~w3MCbS7=gR{
zpZpcS`LYlmhhlY?UXEF>c8S-iGyp6CAG_raULuK-QEId=e(6Edl*0qjm9ksV)hU(g
z$^BV&Ir}Z`cj(wY*hq;N{z^HU|mA?xA2>k5qI!XPEpT1
zScv!p!7^#Q>l^09gee&_ok>vN7z&4#OY-JI3R)h^#R4N8NDDMXfRGV5f>li@B#?n#
zH6l51%$X6BQBQia?Ymn$nY{|WMUep5bg(ih-oe#k|CU!>iY`{&ajH>rG0Agqb$Kao
zA^cs(MPXLzkvIflfkmyG=Y1OujJC
zwR`<|NHNwfEr@DaN)rprH6auL_Ani!e~L~9%>a-axjvc(XX+e#q8|*7-Hpx`tf+Gec*cIB7H_)Cq&M?yn-N}>V4R47*-Br7y1m5#;7kB}D=mlf
zaKaGkgs2fw5nuonUB1&b)KnU8n#h6?Bo8(l0d%)B?daG-guX#4EPMSL+|Ur+esvs?
z>Ei`ru-ICk(HILm=Mg3FD@E1fJOmL=|LJa=N&UpzExT@YXq)6wjSWC?XOfFe6#lVT
z1L(m}s6_K?OgJ6Cag+fY4|Lc$R!8s`vg>vz{_l~eTt+tQ0Q_^z0@#mi#H5>uZ6h62
z*92uzgB~4{rtsvB?Udnq>B!jpF`~j}Y|JR6jK@7Fq0M+W`vaoYr^xJiLO9m0t2Jjf
zd@A~0T#ry9#URi5Xc|Bg$&B6VFb!6khQZCTpz(PfppxiKSwE@OK~47J7FfI@P8=?R
zeVUqyQ1lT^IH?N8`+;7CJbDnmd}27-gtydlO`^?=T7&NAgxWZAalTMpg4*R~>Lvr^
z3#jYxH^3cv>B{qcEN;DPQi%&$5YJTrVWOC=qqlcq1`4maPf4TH-~f`9AJB
zQ;EL>yJw97q%|_Zo4UBxBpNN}Y>l$S_O5r;>srVf?G#a*=f0UIee(|CJ-(|HRM?@C)+BZ3i3NvRc4-}>16ci^P7rr)^D~K5|OKQ
z7v5H>bdLFhw~;b>ZkvvtDe2H|upRoQm8>lGX_$twwc^h-e%Ys9hc#V6nsIx}aeQz%
zr1}VQOyaT5G+uL99XaGR$Um7gWZU%Hf6mh?KzIa*@iNi)$*eAlyc=+N=`*PBrer}T
z&66eBE+9pJ+EqwPUBv{1NeFIIB(kS4Y!yyY)(w2XnBIxhV`i9*BHoRVlVu*jdD1f{
zSfpJ^mFOtnC1+`G5`~DPaN8S3U%d
zWkQ9WW)idJsHcGS2Ue1@&@UFadPot8uber|k_1+$9j)pn+HK5w2moE7Tsd)5f
zaB-GTzu2o}R}piQX23%z<@!A~^RrVuy(8qBQu=F&A;nkFfQpwu8W$Jk=gkY$5WSiW
zAIM{X4$|bSLu>u8R(<|#*yq9Ze{6E?TL)i)o8pkog6JX4g2ZB)0+7w_sxmC{3Hjgd
zkUmJh>Iiu3e{uMxQRtQ&>|1y*#i4WsLmz`U&LnAxegky&LHc(Vx%!`3p}}*>rI~va
zj~%IJjaeQC$d}?gk3bCgnqB7~Vtp*ZaX~!Kw*bpCDSIq^pYy{IaiwdR+81;)BAhrZ
z!No7`1X`P3`^$o@=z>I1<^F~&r-}dkyOkCL4~2lE-3c80nh((5-@+{r4iAmcTF(m#
zk%vX#ef?r`7z6|YzPbXiZ`DQr(Sp{MYyS*s=BJnawE?Z$i$Bu=t^2y15dJpw39w2L
z&VL{ADF(c|d13x~_=u83D_NVQ$h+E2yS;Ge;*F=%yq;9UW-xb8*?K5SyX{)^>_nzz
zEY(Qr1Y7D3H<1=NnBKNio|5>|qd>h-AHe8=+z>=Q{m|ncu9j`;dxq0zWt#5HcQJFe
z#;9%#O`C7W^dSL8szq@~KODS;P`F~)G}$6d3IVpdo8%NEZ_
zK~%WLC_ia;SHz#44wA?Ut>St2BsMRtkZ@J9;z&7NCAxt^z
zosIc%qwmfT3s}dpeS_@S(+|C{Z#80&y+MN>U+rW`3j(#I2XpJMr3N1vAxHfIo9B1rCUMvY2q
znpy9qu3_|joJ3IhJeVehA>veX1um~ZUMpD+?x^e
z`k_R7%5r|wy!nuVj{Rv73)KX)%rmbraf7qGc%da6N1klP9S2`%P7lv;9r{aixi5~l
zw)|Tw4Oy{xk~5bax&PNo0F(Mk^aMYyPE{6bPnl4+NU?OKMZ?BX)VX0xhKWT_w(c+M
zsL|FuVv`3&fGvvHUgV2#5J5V^-KAntWea1Mm1WZ!I*%p&P@!b-7P
z&Le0IrbA4Q+!fXiM;F|+;
zaUD-4BRohnVUmQCU%Rp4j8_X*FCo*>xtN$fnIDURyCgZ%0++wYWDMnY69a&m>^gKP
z_2A0Y^72azRco^-{>|k=-;^d*=!%Ik1#ScE$F^-vZyiSaPryMyrL5*SnF1yZ$sG
zfhyPkolq=>nuphlv_#XEQeepn5wxLfov&`i%FIbm$Is3es0Fg(Y=8|v4mCt}B+(TU
zuy$$I<&p9662KMab!9!jO%s34-?^>HD|(&=lZn&POGJt;R$2)MeUB_HB^E=8slF>Q
z&^ejA67LccoH3`_mWLa}oC)(ya?)T%s)SQn4)M--$SMykRQ3Qs%le%x4u1(U5+r-i
zpP>EyWzC0d4O;hyh2h$VL>qE}5RGB)X6A*|(-?x2<*cy6oesYx{Nub&j+U7W(zY6=3TsZ+mU*pI#i&MV
zYzU`_Nx|9(VG&Lt(l-j=1WZCG|3i4i0}6?dH3XzG@lrdsp(rpDW6-B3Jg7BCfEeV)
ztjZZme~&vZajT(P-WWkjg`FuhJJLG=l!_spF{D0Uk8$YWeWpva*
z5m4SCTI2-H^aV~Z6otO^@!u$nLe+Tn@!2Rx>c+*F&z7winmNf#Oap5zIZGVMhkL-|
z1hl_`nQep;OVhk}bWd!}B;OS%fvcK8cqpI%UH(X&o0(h~MPf=SC5YC&L;z-5WIwjha_WRS
zb+v`Q+2CHk{=5lcB;`%o5gajw
zy1q;M5@&0jtP34CDrcIcP6fk8j*fz$zkdtK-}lR0a0Z6K+{RXmZ>v{)(wof=1Dh=Tu_vd^hL2QlH=neW(N3Dbxu2hJyl=N}+ZP8w-wE(<
zu_?5hNlNWzu;+g;Je171UkTN4?(tWeJyeh`Jde0R+)2}Peg3kO+-c{G`8-QcR1%k^
z0>*TahIQ!&^IofOCv=f?Ui8r2S8p-ir@Ogj#}ywC#q5&CT(nLC@Bjy6eRD+vN~}l}
z+&QhfJRJ`YA08HEIRtE1a_XW0HFv+-KRthqUcS@`nnyfZY52>E58}ys0Au&~kZHgQ
zT{mhWw2a|NEXQB1F{ocsp}aSYcXsh`71zLR8?7*qG|g_-j@!xQlrobNU3tR*XiiGr
zUre*Mcle6IToWYh*7E(mT&&9*x3lwoJDwh$RlBJhFVh>_l*8ZE@#%SgMErgy
z_jtH-yT7T5yTKU{FzO5Ab*L=d?_^ZojC-TomR|7AkHv?fi_t(YHFjOUA|IRMpc^
z8i*C2&iv>SscI$I!y89=^Vu=M{cG7u@75xe(~0YKF=VHafG;H^bf=c!Ry`isT)TlI
zVZM96%ir~+h*iP|i*w-RfDq;@^b07u_w^_YUwqF8k!R)Tw3Lf&ofm`Vh7Qu|vYBS_
zLn)y#aJ#9|Yf3A%RRW3{&<2=t-D!I7RKq;%^U$8kEhyfR*phs-umdzs~&!f_bB!zG%*P@xD`gE`{p=$9DbZ&Y`U!B~W}_;I?oJk0K3
z6s6Ym><*IaF}?dnJ{RAP*$!y|5K#!OKnO7eK$$!8G6h^X+DnkB5r|)`>1U0n3OQWr
zXnLbc%IeDQI6#e3ndb^ZpIheZE~i9sqfBV-RN#%$JAg)yJ)$F*WI?@buLPJdIMyCjXrXa
zc}Y*#y^vvsOt`-YVeGm^qktsrnHplo>ui>R
zU3;;21aB$)N-BB}K%!sRZc3{sAJAc8=`imdReza$W2-ongQOtJ%1Zeo&v$w@3oKnJ
z{Em#vBsy!CPuR79GDo~AESQ(l?BkR`T+Fc9O3Q+~Ek^ice>NqCy%&1&AtotMsUOC7
z*I&lF4J9pnG^RweqaVlk*maaJK6;rjlXzg&(7WA^
za0eg1RvJ7H)o_1UL#Sp2LU2;{881+bGm(|5Jdo4apI6_caLP$8^r5e|ndGCo*$yub
zIQ)_`mcF0h-#_7--ziQsFyN0nop5eR%0m>3RNl14FY4(ik1u`DV^j^8>MubCsB|@q
zw|q*dy+zGxv#e|p^0YM&Bg9@8KxgX5HT1-RJ;WQ(Vb%Ou!WaJ#6JjL3oVc4aGQ0R{
zgVI|}e;T=KwE5eH?Tj$0EZdW6zR%80uT$i-b?nIC5I~A|D|Alq%+BMQ$^pSSQID(u
z7|e28)f&rUkY9!@LHk6Oe-O4^JCj#ryb%L7v2lPDJ|O5dvzW*$v&B@r)riH@=tO8*
z8z|%b#Jaj0O?5;kUIjtn~)k3yl
z&}Ws=sV<4?#nHcItrfv?Mab7KH(iik(QN&}{zkVW%$JLh15HyM#s8#7852FPsLmMU+3Q8@;216Sjois<28PdLClL<}`xv4K%t9_YEo&ODuW#r2^3c*qp
zujS%ztca|60%$=NzL=B&TQ8+BBQ?gro(>z5;s;S^jz=&ulh%uUO6??Rn%e!eNs
zLJcYEV78gM70j?*MB^G-=W{=^!5)v)*3C25-h&4knWWwV<(qeREo&8FP?O_&Vx6Z+7M7g0{>#*FQDBw#0p
zm?OkNGhX}^>Yeyf*awMhEU2CRS3E2=GjAityH*$up(sRN+o6i_yA^2*T{CWCfp+X!
zh+rv@Qtmv{kf*a!!U)rs{Y0qG+G6>SbQvA7{;~k2hj>>M4ESwP@3jyITPQ{C!}h)*n_NI
zhNkEoBT-Jedf~*m6np4ACeH7uBd*TO?I+-<=RYb**XB3Vi`GT>epAgOyMoB*;T*iaiwFR-u!L$J~CKVrQ$ey^*bzMyuqJ-3?B6f^4`d|8e4Su>tzgFi<{-liRky#$s+m}n`D48I1@!9O{R>Un%Od&$?ihTOs%{A3D7j6K%r
za_{RQ%!dCc8{=Z3ZmP&G(*lYPJFH3J1ttPM2C%x<@$n+hMz?B}F-ZX93M
zw%CgT3yy-5LCEI@2>U>JFE+SOgBz(I#Q9XspdVEWFqdAT+tg>OBe~MZZALc9wUVyP
zX1>Tj+#bBjkln&Q$Tsu(UDNlW$M{sh6Ae93U3w7Yv(Ge(tRPoB;UMed2JHOik9>5W
z6bm3wbf1&H1gF_a-I!4)s!a{!0o2r*-?+)($N(mWvnQiiuCCbyx5J5{Ukn_2Wr@Ll
zX0ysTPRQJ|>()zweVgwn}40<)(6+e>$wsN~;@Y|Uz^L?7|~O~7>>>Jqn+Y)==`
z(eC)x$?5pb$zLoZZf(}D`WN`WX6FB2;VRO9pP3uk+1S|G{tuOljuUfv9|Qn^?oaCS
zeF2f`Isv6(q7A2m?FmOxIEbs?hc3;Q^?(M-EHY=V`B^hp4en?xIOu;M~kzqSRV${q|E502dL>VE#P(_7Qm`|~8zr0;(0^GJC}_=P!S$g>E4+bNyT
zaaXKc`UDEhkhyi6fL|b5>np0)WBOA?D5c(T2E~LbK!U|UdSF09E!2jvOd#x;BLPPR
z)cRykLl6`CuCQZvnBpu8IOm){3AM**lmVAQ4xVJ1qx6~6>(;H4F$a4qu
zGczAS54g%*`d-*zXV~IHc@@%gSMxaT(V6Yu!S7(+UMk0w$!Ad`(c193DvN
zah~t6&%ST)cW@?6M2HDgCsC@P=z>ytgpnmEQ*^RfQI-CsAeYAzh0K@h*gT1Pq7`Zz
z?+Fx}@@!o0MqVZrRYU=$s*1eSM^m_VeO)G}kG+NO3s9nfoa_Ed6a>&EUVeLn6~43F
zk7I;1hA=JlYn8$RV;2&lDZPYDf$0=jjR6@V2wT@!(J2js0Xx@YfSh3D-RJq@;QlK6
z>D|ur%W1^EM!+5iZ|YT>k-ANl7?ibR(O2u*fNJn0SF(JB=1V`55P|VXSz)lQRkewB|
zS(_O1TaJaRN36(<=&@KXvj!T%bXhmd0)fiZU~Frzt~)mAq>4${e|F?pnYYffPUGg7
zn(a!rhY!E;&++=~)fa-7AiG+66=k5V2%snv!i;LUJCvUl>+LSuMVpFTYoDO*NW54=@W!fmAt2sg%oWQH%0~sfl$O8Wxv3KpouH?Bf%^3
z*__4mp!gsWm7rsP$}3KyX&{g#$`p3W*eJrZHwqriQTM^Tv8t-cux`6)p?h4!RF2kj
zXqBoipTfefR&{#~Heg7bb(-b&0?7mN17OT?}{-Q&1s@jo9@`VD*-T4M#H~W8FUaA2kvH=Jn0Vu{Lk&hSg2HZ-rV$cITZF)$p8fCK9gHC2IDi$&82>!vZC
zXdR3;fv%!53{Ht5mKkdml=P*ua3{5q#NL2`1|6#JU&a^97v2?Z#9_&_yY_+LwxD(m
z2IiOO6VklNzA4|4zffBQiY$X7r0Puh5V+HQ_|g0qn1J`AwoWYp+oYJZ4J
z)v${6Nwj5~R$#KUg^1HKkQFL8Wiune*fLoi0!OR-(IP}H!Jx`4WssR<8#HGL2~F7t
zLm_xNj$?yua^TORa=Q$RIHQU(#OI!*XNg2>sVgCTn#&8f-j@}r
zHUgVQb$z{7z1kgt9Pz?#lK4@rOQo>OEVy*u@-*(c5_?1
zW~=U=HHPe@yMbc;iI1+~THw!aj9l|3yw0P`^7OIN3E*k!Fl{WIQ}VJJ+;EC91#2lz
zNxC8o@Ul9kzZKQx(GHoH6dA&ZPD{E@T^-T+F_F7|sbwjg#uTrppP=E1yHljEbNGo(
z)sPJee(pj{$O<+uS`3}RH83(ePte+w7fSm0s^y`qJH|osmkEOhRQ%tw;#Z=0A#~)B
zx+mxWX~_~vKT$*_(&jH@gHnrxXV=++dNkGahe>eMCPHxyE=f}px9vh`#ZRRF92`PA)v0N#u03pWvJ(8OFz
zFN!^rYwawovf_?)i$13pmT$`xh(hup`@#=Gq&K6IT=?C!ojT-N;LQ`}wW0*n(s_<~}^BLYQ%8xb`tRxpu%;kwtpsYWV-Qp!4DQUg1AU5FTuX
zNa4i}L%;fzZF5=+bUu9H%h~n9r($EU(RKMyE-gROQlF-dn83U*%$Vc?t*Y;;SGrPn
zF_oCAs8F$L84StVrKjf{KODJfhpyl|ql<5S7UdS;*Ce)17GI+-WbtU^Xx~=$0U6-F
z{IwwBBk}knlU_3BmxQs4zd;{qP8$Cg^uG|(KUIbVGSYFRKV3ZzVUyXL1URXLq5+ZmJ;|C?)?NpD=%YJF0!{-I<(_4&jzLSo
zQU58iQ&hYrG;$*;$}{^54*o45*iUmgfM6THYw}!a&TWiOwoW_c6BuiC{t%Q
zj(?k1`cLieg^Rhm`A{by#g5IUWNaQkK!br4&0we63o;&m4$ytBl211enqJUVCCvL#QaqQbWSEgKbp)tF|Q9qxzC&W+J
zkEhG)!NXTupdN3~ydEBJJpR~To?E%UAZ_ryL4I-gd|LjFBd@%V8lEcU;~-hiloeU>
z8}bII=u{?2=U798=c@7Bun23tH
zNL(DJ(V*WA8)DD}nEZ5cF#dyo+joW@?45SL2A$^Ew^U!H_Y$>aigefqO9HZ2Pt-_cTs~%mKgQAbdxIqWN=u
zP{@f?A9-_!YkmubdhI1UNnPW!#9VLdp9apP2^0W5`7~wITOYx?%4Qh{3@8Dt-Gd?4
zu_!Bu|IV4C$&{8tF*8~MwcttyVjBpUPVA5U$WR}vD$=g;-xVyP+gLoy7ocyA=pMhO
z!MLg)9#;DadZ}Z1?MLE9XuFZNg+_@)&RfsWhmur~1!lD@xT0y(Xr_p!`ZID6U5sbR
zDyAaf9t#D5|5i_kRZwx?87(^6L6oza|CL)}5a3v&_L*zLc0c2ba#AjXLw@-ghG4eq
z_bL9l&PlH$CL{=LR&-*#<%s>c>F!-2;h;b2md&$tpTiTF33r5HUN0i#u*uI85A|0c
zQIwmg&b|^@xGLx!N2=M#;4r@k{vZ}t7))>#&drlaifaIOfgvZ##;oT)eAlYOTQFuW
z863zb%!#E`;&4wK-#KhB4+2vKr>i8^bu=*pskv9V89oa97Z7+=5g$*4culF<1k$zm
zC0GWfRP&)!;5qe=Ac7SN#6lBdzdXtV06>Z9_#&BV+*T@o2R$5|#^DI!Q{u8lVP
z;&jj+A-=kKz8e8_G*@*x71v@e9|`^6M3GbXKD2qKo!0zm6X}~I0dS=;vEW)Gkt|fy
zi7dURrXW&G5#r*!#*s|r>&YCaZcvMRk)_pzhN
zm4W10=XtiK_KHU=RJd`u`*K5p6H(BbCs<-9;7g0M@KwS!v2+S4qrlsoo`8SwP}hn4
zmuF2{DVRlwr6_caWB-u#sK&zSPR~!g1_|_M}Rfj%HjYbg4amIqF%<9ef=VG(OL5)TvQc5^T!=(Os
zHIiS}Vunib42Mwgtlpy}Q*I!4UJ`RK0rF&hRz?Z;KlsJNvMVnlxGxHU-fn+BAC&05!K85CCq~hTNxlKx^eXwWOy!EuDBq73DUJEN`h^wdtA8N?A
zyanoBB(A89Pe_!xG2$W?70CM)K%e%V7xvzr=qP>T1kB^65C6jk=a}4de%WFg*qZ!Cj&|x@eHM@jjK`gu5v5`icf72OX1`3{@gc
zjmapPo=SgzDkR%tjg;paOTn2N7iDKjgG#*wJyHdCzXLpSodh4D4b2AzWp9i!)6C=H1Y=6#o<(Rc&H~!8}GRWU&WX@ZFvZlm^$TqSrq8ztZz6OWfw_Zjh!x?
z0xC&em_=P~BP4oonzlFLwcsG?*f7U=xLnK~!R!moSo8ar7Fe98afejtkCszke`O9?
zCHJW16-QPee{zZ@M7D6^!W~I{3NLmfc9aL*&*%3M|CS#yX)cKwV|s(_{z_q6+V(@u
zQ
zv=2{t7(~!<21MDpbAsQ=dna2{8IBoB1wA=?djHt_Z~T>__Rq?sKY9-04Edla8g{;uT>2VGHlk6*<%_7p2<6Sf?imec?Gzqg#FI#uA#_vAvddXRm#n7O+hwWysqe0Xfs@jqXeb&UNdXnzU
zTJ=0S40mXJ+q(Uuo0Gb|armzkM7dIUgH&-IpqBmng~o{AOU!7W!_<_KI6RvH>>wN!
zWwJZp(qwGIP^s3y9tHZOoEr#8NC^&maftM&;W&kc8m58Ah+!@A(AQ}u)D+7`2{yx(!@
zYJ2;`&Ywy^ahOAWUyF2A5-BONcvdR19D2wJDWgd=?!`%n2IlUahZW&0?)s{5hyxSV
zqzElkNTH&~rO8r-7)!!p4;sBd3Ia%@DuH?qxx(J4WZ&HOI7tz%hZr+zp^O)028cow
zNj9d{MJQDy$WT@f-V#qovKXU1vWx-jp;_8A_`T83t?RBvT$kKU6xE46o
zpjr$CUYm=7E}vvY0s-~!{L+1)>Uy=m9hQqCIYGx&j1Ur|OE$*%b03f|QjE`GYqRN&
z+vUwZ5rf>JU#L^5A1j{{DYIy<9rXh?xsNei7MPnN`Kg@50M%%qCxs%-TzM;RT0(9E
zOw5Nd--`Nhy#TLjmF)Moxo>l7&Sl2$(k7Xv9_-sEk|~;GICg_YkC5Y@%*T$Hgp+S1
zb_4a2RPcB|%reBCx*(^FGEtqBAE>grcdGVIQ6e5NmY@qF5hg)>1wlPQq6#*a5Sdr+
zd3ifhQ)nBUAIt6hw|oy&40aE{y3NKOqbriuzJk8?-NWxAM=zj3wyf8}HwR3;
zRFb8`)|_J9th;1o+j66Y0=i(y_nY(WjDg+(MR_DcaP-LgpKf0d>a6a)B=Ze^{rYXR
z7o%!5unqjWmJdE^4(sM>zrED#t#bg0hcobn-U1J|-ygLR7IrIG<{lX5+^xv{CpF?3
z#B*A-I+BojrDwBYZLIMjl$X|Kjgq!}kxHXfRVFhQEMt^Lrs~1vDkd@stuaS5XKm&o
zX5!f!ya+A@d6FScLl~PwhpK_;oZF(zK=x|ngs{pSz7e{SwrphR_n&Pe%j_$JEDKyWerm`Z!}w!jM}rXlIJ_G2Z4|ny
zFtmQowD(ocQa>c~TfF?H&*Ei8gLhV@7#C9~G3bssR&a4^~MU&*<-q^L;<@PgkKPhsc
zY~sbi#$}oo@iP6=Ej4E4g{vX9V>ME=b7bPSXS%YEp&fjI%h_wcc!d?34mQYmQ1=oF
zlszF?Q#jYUcFY@o7{h5|czOM{Xsm!wAUkkk$6&{0m-BLN=%2w8D%Oy&iR07#C@wx0tP;H)ch{ssNtpSB;3ii|gZ
zgoP(qKV^^qm-l^F6Gz5>-~4mbwd@W=QG93XFic7;sX6yL9o`pMvMS3J8Z{erYAPXM
z8T!OW*zkZ2{Cs9J8UQz$Ya5HSqkVdudNcOpx6Ixe_NrVYv^QOB_u{1RgmoDRUlXNc
z3yrY_`D9mEbZRL7rg-~$5X8~v)rS6hC5-c)QS3)EPNP#7oM0t(9^w!ixLef-~Jk&@t@#V(Bg9`&pfr4)|WR*s#z~GC$!T;$4
zXU}&Aea|G@LsqQ6jz!nMy%INvlbk+>QIlgZaNP|h#0v+U;R=5CQZis2Sx%)pMRvYbrJt}(=UJGdbfYErs_#F6b(
ziVu8oVL7%IL&L;NwOAiqFv1Po`Z(~Z@NYmL3RWu-5n&ddy7n21>jur(eo<~
zmC{drXb(525e_AS2km{8N*pJ!zq{ZqV>gpJT>RA1$UTz#18Vqc5^TUvyQ7a67e0?|
z6QIvO9grx{0Ti*qxEJcVQje!Wg{DFH)ni~`8r+uIcw$M_eJ}bX9b1&$JE^VQGcu6Y#(k8|c;>vvu982TA0miFlLBhlMHWT${*{
zA#}>siXHhG5|mC9Q{d1c^z&Ci+M_qLD;vY2lVTsY0EcI_>d9>1koLZ-5;h!Y%0#X?
zBDKSd`@=(r+>L+g*%pL`^ApjWDzHdG)%6O!rA~_BAj@xqEg_N+fMMht`fKy0bbcOx
zZ5Uv*!Fx3^@Pa$QE^%1s=LQf@vZkOt4fZPi_WA+M&;xv|X)C)9qbcvL`=P
z)gU}J?9%DKOlbG-jf~_AS{3F^>L@8ZAJGknb~D;ipKgS*z%v9iJ0cuoH1e<&l2q4L
zSns+AM+OMD+sqFKj(tJ2DX4$in=A*Hh4x7dq~KsT=!)GEH_k^#sPiNUm&;oyc#iYbsPd9Pp3eQ>K
z-(FNcJ`NZi6AS{}E@|_EbB)7=-j8gg_HI0l4Stm>?B(|KnM|@Z-JxbhLA^YJUasMK
z`^3w&b>?mc+OPtJ9~D&ekn+ys28-}M)g%CUgqV=351f!{1jW=t(nnc4QPFwJB&;qc
zl2G&-A7qRcqsYsh81A?$A9t3LkWRJ1)>ApSbR!zL9JZ?qlEOf(&pCUqUeyaU*)7|&
z>4eKJBR++kefVq-aJ+owMqIVg2;Dzq&N1!w8&pf=|S*qHMKD3m+gHGLKQ45;Y
zzBYIWjRBe_%>b(FV~JUwSvWvA!R*}J2nSe2wydIG_Aw7{RFMU-tpwPKzhRZSdC#>q
zd*lSd6RF%n_nbn9(ocK1>JPJMbtow`YVWQ$*9lw@$SGAt!h*`8ROedQSZt_~x*)jb
zTI7T==Y4$i<$X8AgqGeS$aw_VDpQ+qj#VANqeO
zZd=!X$@!<%|J{&P@WHU@!66YqI94REVya$GM0?>ZqJRQKJI}_br7S|3s`lu&n^5>F
zZ#*u!Y5;VPFuuEe6MMwVf;T9YYpuwpXKe52?C9*@8@sPbLWvqUt>1`#A$g%P)ZbPECI*w4)S
zWbIySZ0!z5-58K=77raMlw>hN)Ny0pI+%(pBWX+>nw;Cp#z;|@+-9>{Qb$6MSw%V3-2~1+Ee{)Fs4yO1{
zYDSsKyA2=_uOVJK<|eVy_w&b*e4sr-`rtCbgf{ykc&pJ$MycXRMAn!r^DyP4GmL!g
zhMxZ*4l`RzX3y*a)~hRHOZFi-5th}tyW7TJ;o5UHy*;{52
zEh;Fqh>Gm!1*#aGoK33L5N&LE6<|^R^DE}5fLdq
zA3Kx>pM%oO2n?REKnvmRf)hzgpwR*Kf0qJAfMV~}j}Z`ILK3wi34#%$k$aTx%z4x=
zE)sywnuspmpbb7;C0Hl-LrO?cj(Onn`P{c8r!3IO8F37JJjuLYYVHtIxes_WV=~_+8%kI~T%?g_bxJB#Z
z)%ahOokNr;!M3LBlx^F#ZQHhO+qP}nwr$&XopRNwuKPx}d-QseD_4witsF$`9smBm
z#`50tcMM$bM&&XvI?rIXZ^YP>$_BfTMtnY(h%@r9)stU)#OI5cGy1F2c0s_k<{+B`&olxhJA
z3*);*c-X%;JZ5l5h_X~Gu
zyBUsU$bAp$axC!4axFyaoBOj=7mRi*X)$8zXVezzJ6hF~{@^MF#Ee`5!xiCvqdEG@
zity$b(rMgZQYHDIH7Wq>z=T8<%h6;8_RVl$C!sKNS_UFa)#v^;fS6*4!-oMdnxrHL
zqSxi$0qD_Kq!_A($XQ{5)2*0_C!-&@9Is_`+(=&%ElV?Lb*YF&f;u2RWG~zlFr-su
zv#s|Vs4T4^bFKI5s5sUqPw@^447Aiog&_^z&{is(>sdy6bla7K8Rz+ActWx_Y(s`R
zI@gO`L|)J@k{?1Wrp^-wk{VGUwS+d!3(WR_{o|&4o1w5Pxdup*2aohgV!zsV3Q3u@hTa~+QXBh
zK6v|N9UrnA8$#JP$o!G4e~Rfl=6fJbSgD@gO23q@wvfREpCcl_hjf=VM2A
zK`DYRtcSn*P3_~ZGVw$wCNU-*+A&*QCVP)=yMhL^tn4<=|)-A*q1u2F1As-)-kf
zM(k(}(pfcx^)%Cj`4|du`SZ1UfiMo{9}Vg8h@ef
zn!({d@6tizqVefL5)-3EbDfLr!rkEG$|&jzszu$x-BdaMyL5Y>
z>Tcow)Lqi$dOp!_3*qdxPK!)r&y>vUo5>@>ZSIu8U9RH1R%jeSOWXs<~?}ygM2dM%}EeD~(Q+=u3<@m^VfdGu{Mw
zKG;}EauzKY4)1U_{ws*o0$uFb{KxqNLukNGs4JYu
zrT6EN=lFszZ-=L~Dmk{}v!^(`IXXMqUA?5LxZ|6Zldh<&s4A)U4=9eluaK{s!^fR?
zd3kL5LMzrja`SM>l0R$vEG41vN$F52wB>lRf_S26J2X99lDCImnZoqsjrkMY(hi8^x}zVgp`g{ey>CM3?@cEB6IaYogYJGGqIX3XhS(xU{<4a$Vtrty^l?QG
z#u)F>0q*!BzP#pKTwey+2|LSj-P3u$T#_V+dp@GHJYH}1k>BxdpoIw^Y*1nP$D?!K
z?A++HDGgUfUTwVCgV6^C&+Q;C3Qas`Aait@)D8A~M!00TOXk?7>5(J;s(hq$i&ae1
z>!HWkb84GVVE!>N#awqr2@9bNN136`FbzzvrhD2#n3ZIy7B4K%7O+$(L^q}b2ec&|
zH--)H#To0do3qS0IKYx6`$Z5~+5=a5+_x03)9%qH`b*d;t+f$DT1z$e`UP49-k9**
z9RGT92MI_Jd(vCBa$mxBn-M?uu-K`2dFP=rkwkg5d#R%9W6=EUL_zE)!d
z`WsMU{!G|%17&j*giW!amxTN(@_7m1^WjgogLkf{k1cNB0zcFhRMkO^X>1nTX7CWy
z8?Jb}w@(42~OmKZ|wo}D3vnHtDSkzK7K#QQQeXHKt{^2bhPciR9t
zr7=Bh0Hb#76i0%bE@L#ra~9T==P-lkwti89?vzTx?J{(XAq^4lF*edBhl>22F*VZ6
z{;FCfT4p+@D6BKAq7Js2n}(Z{B(pSA_~b?d)>HZGmfDp0fEk*E`JC5OqyzUB3i@eD
z{n05e*cDx$_HD@hT6RG9tJL!N!M)&L7wpihLNBjCKk9JrlE-Yr)V%0wR^5s2?FYL?
z0Qmg_6k~*SBg_$31Nm~kYP^z*)hIKX^VNV3HVf#^+PosvwR%xdJxhOzssbvYb4}IX
z)Bl=QeyvJTU{z>pbs3I(87AbZ4ysc@*ZGdDPJIUM0Eh=V1W_h=vcpTD<6(EE{M_RRruit8x-a
zG1X+y7&MH3#_}%X!Ma%9CfYRYQ2I4u6S^>+iU`SFVWB1P7aE=Z$<9TE8|Z8L_u6UP
zYb2MA0y>Pmx^7QNT2yF$=
zsMRrs-MJvWx22dA2QZ7S|HNVx*l`?9f|
z6{z*dzHP}jgE!@6l?x?hDjr6!FEzCECL
zGZ7t$E0_RwA9Ft!bu?beqpNb}K#NUxd!IMjb4y*3l55N^l4?uzNa*%g
zCv^C;&o>lC#53^+pWJfL?ihWCr&NSiW{LV7;!+7?p1Op`G(jy_As1DiSJLAgayxoG
zLE$x+UKD}Qw=LA72V}<{=eULmY3;b;g++U<#7e=@_*|XRr(S{XWYd*G_M;Ye;zpXbhQmv=cPt-YJt@4*j8U1kIlwkpj2|w?7^0I=A@Of2$c8H
z=`O9&8|p_y`qdo%jzG3?$ZNbxBbZ`_L2kM6AEK~qHvigVxCh^}BEyV`XOMV6M=Bh6
zPGXT$;}%J#AX8#j=;k{hAvj7Tz`b
z2;C+G@v${KRAj>aQ?>#87oAhWyj7ZT2pE)B1g6>`57QEX-1?t1xe2yUdu8}EcaMYf
zy6f|s{q4ocCvEcDmU~CKt_FWuR2UsAeHS?XKT}n`h(d$pU*P|Ric04smUE4uy1E@x
z9rd^h;SXor&m`SECGdo|=ijf@mz#qph<(n2If2h|jPasufK*h~+)Hsd{3@8ozq5bT
zq@9}YT3vWnr`)iWxyNV7Vc+k;8qGi_)sFAwe4d*d*g_#eBAH%O)4erUHex_v{&KU}VONn|~Jm40eKm3w%g=F_ot)VNy~)bWAuuB$c@$>2M5G&;~Nj?_~gISg+F=3
z12k1B!S{t?E$ag^%u^d8R7Rw+Qx&dzrPr7f0#f;Wx)01W;v`cFjE<%P7W3F_B8)}9
z7!9PfE~Yum%9~lz2SKqdI{|$%!aZu)p|+a!;>DD)WkJ5xblE~`2CR{3%LiA@QKLo5
zWI&BE_&hAmlfHDkj&;BKi2f26o%eVT)*YxShZTk#@IrhOWBO;_#k7FY&rfc~1gC
zWM7(@`8lqj0S+E)&c>g
zyfMV|u)gilsL}R$gOL**sK)DmnUml`CD;>yFCYjBzcM$I`Hc(OF`X!iqI`dqPS4@0
zooJ&?zmspM-)lu^YivhXw3NF2yAL2ApoSj|1`L@z8bA^9OnK7sdmtJD=s+1G$ZN*s
z%z7izjJ#L=;AM^?hL!KY(&Oneh!^d&Ye+Y35c3|vLlgGUMGp{PoY--rI5Q{T=eKQJ
zZf^I!Z!b6Zr?$>Kp<8YDhcBlm=eEvXL3(xi?oWSw@pihe0Yt_M@G~#pC`B?HDPb7l
zVkumb(?c^92@83%81ZV3nu4dvrw6?-otPmBG3heo4^YYt^WUl06D0_rw?i$+FCU8X
z&|lFc_zw~z$B(w5&Xxb3s@-CSdSf`+W!md8F#$(Q(br?7nZhliKw_NNIUD;RN47XJ?E4H6QCL7JA7IHEiGy+AYQG&dqbOp1WGDyHrszv*l;H!q#t{-!do(m}N7@JkPH5YQ@Dm#cfG6Et5jwmvRL7
z6joQTx91w-zyI5_HrCNLd{u3z-LW~e(gQ8t8TCg3?6O~hdii9DoM|9x2!&7%?EP+P
z8>D9OVZ`v$gJUCD?nJiS@p(vOK;^r4nrMNNsX(2&qZTmis&v8=k1i=BQof_4Q_a${
z9*z9eT2th*!$$ryKFT;+{%Uv&V)A7@7~InTgaFFsgz{!iwc}!I6ibHZLWPkG??rT2
zD-@){wMnWth@swsHix<;ZSw{uZp+bGl%OS{Q9|9U#m4?NK(juFek)pG2wgc*hf_=s
zEq602U6yYRvqaPYUXex8XUhi1|501npq_BgcCGx`@96b?eVkpPLXMf`p9xOGo@Ke=
z5&nDR{hXuMmrFXhRaUy8430RpW_8cR=F%7p>U==%bE)uor#unoNy4hFTeT77w+hjg
zs>{*jP9Z|>zf|_u)eQ=uf@r&d#AaB{WDWGLvk$?3qXG^4YJ6(3%)C4qlWn^e`$7;Q
zf<%*I;=v7&dSBoZnJC!$-m2p>w0qDg3u%<=JVVG=nmJ^Z
zJ(@coB~L#IFl9Hz+X`_jVVqU66KxJl2<<%9FzNa~v@3qM4Afz-577Jk+cbSFNjxfm2
z;|w-S4K!N7uC*(a+C{DPoYSQdaK8puyZ9p2;Jd;35P^fJxzTf#_o%YSLSkP($v+z46=(Yuc6n+NQa9HXf-`^#RD{!apsS&zZ}zmgy+T37!wmwMX=h~#5|Bti<#>l4>e
zEOW-4Cc)|AnKLURDkRZ7ol0nu7nHtC#TcW
zhb4|4{FmkD_w@KYR(t(q=iA-hee5^>X{p8P
z(?L{bs^)6kPBoEb)Nt}so