OTA - redirection support + bootloader

2026-01-28 21:30:54 +03:00 · 2019-09-26 19:51:16 -04:00
parent 4f0fedb126
commit ae1a480029
51 changed files with 38686 additions and 45 deletions
--- a/components/bootloader/subproject/.gitignore
+++ b/components/bootloader/subproject/.gitignore
@@ -0,0 +1,2 @@
+build
+sdkconfig
--- a/components/bootloader/subproject/CMakeLists.txt
+++ b/components/bootloader/subproject/CMakeLists.txt
@@ -0,0 +1,128 @@
+cmake_minimum_required(VERSION 3.5)
+
+if(NOT SDKCONFIG)
+    message(FATAL_ERROR "Bootloader subproject expects the SDKCONFIG variable to be passed "
+        "in by the parent build process.")
+endif()
+
+if(NOT IDF_PATH)
+    message(FATAL_ERROR "Bootloader subproject expects the IDF_PATH variable to be passed "
+        "in by the parent build process.")
+endif()
+
+if(NOT IDF_TARGET)
+    message(FATAL_ERROR "Bootloader subproject expects the IDF_TARGET variable to be passed "
+        "in by the parent build process.")
+endif()
+
+set(COMPONENTS bootloader esptool_py partition_table soc bootloader_support log spi_flash micro-ecc main efuse)
+set(BOOTLOADER_BUILD 1)
+include("${IDF_PATH}/tools/cmake/project.cmake")
+set(common_req log esp_rom esp_common xtensa)
+if(LEGACY_INCLUDE_COMMON_HEADERS)
+    list(APPEND common_req soc)
+endif()
+idf_build_set_property(__COMPONENT_REQUIRES_COMMON "${common_req}")
+idf_build_set_property(__OUTPUT_SDKCONFIG 0)
+project(bootloader)
+
+idf_build_set_property(COMPILE_DEFINITIONS "-DBOOTLOADER_BUILD=1" APPEND)
+idf_build_set_property(COMPILE_OPTIONS "-fno-stack-protector" APPEND)
+
+string(REPLACE ";" " " espsecurepy "${ESPSECUREPY}")
+string(REPLACE ";" " " espefusepy "${ESPEFUSEPY}")
+set(esptoolpy_write_flash "${ESPTOOLPY_WRITE_FLASH_STR}")
+
+if(CONFIG_SECURE_BOOTLOADER_REFLASHABLE)
+    if(CONFIG_SECURE_BOOTLOADER_KEY_ENCODING_192BIT)
+        set(key_digest_len 192)
+    else()
+        set(key_digest_len 256)
+    endif()
+
+    get_filename_component(bootloader_digest_bin
+        "bootloader-reflash-digest.bin"
+        ABSOLUTE BASE_DIR "${CMAKE_BINARY_DIR}")
+
+    get_filename_component(secure_bootloader_key
+        "secure-bootloader-key-${key_digest_len}.bin"
+        ABSOLUTE BASE_DIR "${CMAKE_BINARY_DIR}")
+
+    add_custom_command(OUTPUT "${secure_bootloader_key}"
+        COMMAND ${ESPSECUREPY} digest_private_key
+            --keylen "${key_digest_len}"
+            --keyfile "${SECURE_BOOT_SIGNING_KEY}"
+            "${secure_bootloader_key}"
+        VERBATIM)
+
+    if(CONFIG_SECURE_BOOT_BUILD_SIGNED_BINARIES)
+        add_custom_target(gen_secure_bootloader_key ALL DEPENDS "${secure_bootloader_key}")
+    else()
+        if(NOT EXISTS "${secure_bootloader_key}")
+            message(FATAL_ERROR
+                "No pre-generated key for a reflashable secure bootloader is available, "
+                "due to signing configuration."
+                "\nTo generate one, you can use this command:"
+                "\n\t${espsecurepy} generate_flash_encryption_key ${secure_bootloader_key}"
+                "\nIf a signing key is present, then instead use:"
+                "\n\t${espsecurepy} digest_private_key "
+                "--keylen (192/256) --keyfile KEYFILE "
+                "${secure_bootloader_key}")
+        endif()
+        add_custom_target(gen_secure_bootloader_key)
+    endif()
+
+    add_custom_command(OUTPUT "${bootloader_digest_bin}"
+        COMMAND ${CMAKE_COMMAND} -E echo "DIGEST ${bootloader_digest_bin}"
+        COMMAND ${ESPSECUREPY} digest_secure_bootloader --keyfile "${secure_bootloader_key}"
+            -o "${bootloader_digest_bin}" "${CMAKE_BINARY_DIR}/bootloader.bin"
+        DEPENDS gen_secure_bootloader_key gen_project_binary
+        VERBATIM)
+
+    add_custom_target (gen_bootloader_digest_bin ALL DEPENDS "${bootloader_digest_bin}")
+endif()
+
+if(CONFIG_SECURE_BOOTLOADER_ONE_TIME_FLASH)
+    add_custom_command(TARGET bootloader.elf POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "=============================================================================="
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "Bootloader built. Secure boot enabled, so bootloader not flashed automatically."
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "One-time flash command is:"
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "\t${esptoolpy_write_flash} ${BOOTLOADER_OFFSET} ${CMAKE_BINARY_DIR}/bootloader.bin"
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "* IMPORTANT: After first boot, BOOTLOADER CANNOT BE RE-FLASHED on same device"
+        VERBATIM)
+elseif(CONFIG_SECURE_BOOTLOADER_REFLASHABLE)
+    add_custom_command(TARGET bootloader.elf POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "=============================================================================="
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "Bootloader built and secure digest generated."
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "Secure boot enabled, so bootloader not flashed automatically."
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "Burn secure boot key to efuse using:"
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "\t${espefusepy} burn_key secure_boot ${secure_bootloader_key}"
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "First time flash command is:"
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "\t${esptoolpy_write_flash} ${BOOTLOADER_OFFSET} ${CMAKE_BINARY_DIR}/bootloader.bin"
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "=============================================================================="
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "To reflash the bootloader after initial flash:"
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "\t${esptoolpy_write_flash} 0x0 ${bootloader_digest_bin}"
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "=============================================================================="
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "* After first boot, only re-flashes of this kind (with same key) will be accepted."
+        COMMAND ${CMAKE_COMMAND} -E echo
+            "* Not recommended to re-use the same secure boot keyfile on multiple production devices."
+        DEPENDS gen_secure_bootloader_key gen_bootloader_digest_bin
+        VERBATIM)
+endif()
--- a/components/bootloader/subproject/Makefile
+++ b/components/bootloader/subproject/Makefile
@@ -0,0 +1,35 @@
+#
+# This is a project Makefile. It is assumed the directory this Makefile resides in is a
+# project subdirectory.
+#
+ifeq ("$(MAKELEVEL)","0")
+$(error Bootloader makefile expects to be run as part of 'make bootloader' from a top-level project.)
+endif
+
+PROJECT_NAME := bootloader
+
+COMPONENTS := esptool_py bootloader_support log spi_flash micro-ecc soc main efuse
+
+# Clear C and CXX from top level project
+CFLAGS =
+CXXFLAGS =
+
+#We cannot include the idf_target, esp_rom, esp_common component directly but we need their includes.
+CFLAGS += -I $(IDF_PATH)/components/$(IDF_TARGET)/include
+CFLAGS += -I $(IDF_PATH)/components/esp_rom/include
+CFLAGS += -I $(IDF_PATH)/components/esp_common/include
+CFLAGS += -I $(IDF_PATH)/components/xtensa/include -I $(IDF_PATH)/components/xtensa/$(IDF_TARGET)/include
+
+# The bootloader pseudo-component is also included in this build, for its Kconfig.projbuild to be included.
+#
+# IS_BOOTLOADER_BUILD tells the component Makefile.projbuild to be a no-op
+IS_BOOTLOADER_BUILD := 1
+export IS_BOOTLOADER_BUILD
+
+# BOOTLOADER_BUILD macro is the same, for source file changes
+CFLAGS += -D BOOTLOADER_BUILD=1
+
+# include the top-level "project" include directory, for sdkconfig.h
+CFLAGS += -I$(BUILD_DIR_BASE)/../include
+
+include $(IDF_PATH)/make/project.mk
--- a/components/bootloader/subproject/components/micro-ecc/CMakeLists.txt
+++ b/components/bootloader/subproject/components/micro-ecc/CMakeLists.txt
@@ -0,0 +1,3 @@
+# only compile the "micro-ecc/uECC.c" source file
+idf_component_register(SRCS "micro-ecc/uECC.c"
+                    INCLUDE_DIRS micro-ecc)
--- a/components/bootloader/subproject/components/micro-ecc/component.mk
+++ b/components/bootloader/subproject/components/micro-ecc/component.mk
@@ -0,0 +1,8 @@
+# only compile the micro-ecc/uECC.c source file
+# (SRCDIRS is needed so build system can find the source file)
+COMPONENT_SRCDIRS := micro-ecc
+COMPONENT_OBJS := micro-ecc/uECC.o
+
+COMPONENT_ADD_INCLUDEDIRS := micro-ecc
+
+COMPONENT_SUBMODULES := micro-ecc
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/.gitignore
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/.gitignore
@@ -0,0 +1,8 @@
+__build__/
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+*.pyz
+*.egg-info/
+.DS_Store
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/LICENSE.txt
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/LICENSE.txt
@@ -0,0 +1,21 @@
+Copyright (c) 2014, Kenneth MacKay
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/README.md
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/README.md
@@ -0,0 +1,41 @@
+micro-ecc
+==========
+
+A small and fast ECDH and ECDSA implementation for 8-bit, 32-bit, and 64-bit processors.
+
+The static version of micro-ecc (ie, where the curve was selected at compile-time) can be found in the "static" branch.
+
+Features
+--------
+
+ * Resistant to known side-channel attacks.
+ * Written in C, with optional GCC inline assembly for AVR, ARM and Thumb platforms.
+ * Supports 8, 32, and 64-bit architectures.
+ * Small code size.
+ * No dynamic memory allocation.
+ * Support for 5 standard curves: secp160r1, secp192r1, secp224r1, secp256r1, and secp256k1.
+ * BSD 2-clause license.
+
+Usage Notes
+-----------
+### Point Representation ###
+Compressed points are represented in the standard format as defined in http://www.secg.org/collateral/sec1_final.pdf; uncompressed points are represented in standard format, but without the `0x04` prefix. All functions except `uECC_compress()` only accept uncompressed points; use `uECC_compress()` and `uECC_decompress()` to convert between compressed and uncompressed point representations.
+
+Private keys are represented in the standard format.
+
+### Using the Code ###
+
+I recommend just copying (or symlink) the uECC files into your project. Then just `#include "uECC.h"` to use the micro-ecc functions.
+
+For use with Arduino, you can use the Library Manager to download micro-ecc (**Sketch**=>**Include Library**=>**Manage Libraries**). You can then use uECC just like any other Arduino library (uECC should show up in the **Sketch**=>**Import Library** submenu).
+
+See uECC.h for documentation for each function.
+
+### Compilation Notes ###
+
+ * Should compile with any C/C++ compiler that supports stdint.h (this includes Visual Studio 2013).
+ * If you want to change the defaults for any of the uECC compile-time options (such as `uECC_OPTIMIZATION_LEVEL`), you must change them in your Makefile or similar so that uECC.c is compiled with the desired values (ie, compile uECC.c with `-DuECC_OPTIMIZATION_LEVEL=3` or whatever).
+ * When compiling for a Thumb-1 platform, you must use the `-fomit-frame-pointer` GCC option (this is enabled by default when compiling with `-O1` or higher).
+ * When compiling for an ARM/Thumb-2 platform with `uECC_OPTIMIZATION_LEVEL` >= 3, you must use the `-fomit-frame-pointer` GCC option (this is enabled by default when compiling with `-O1` or higher).
+ * When compiling for AVR, you must have optimizations enabled (compile with `-O1` or higher).
+ * When building for Windows, you will need to link in the `advapi32.lib` system library.
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm.inc
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm.inc
@@ -0,0 +1,820 @@
+/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#ifndef _UECC_ASM_ARM_H_
+#define _UECC_ASM_ARM_H_
+
+#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
+    #define uECC_MIN_WORDS 8
+#endif
+#if uECC_SUPPORTS_secp224r1
+    #undef uECC_MIN_WORDS
+    #define uECC_MIN_WORDS 7
+#endif
+#if uECC_SUPPORTS_secp192r1
+    #undef uECC_MIN_WORDS
+    #define uECC_MIN_WORDS 6
+#endif
+#if uECC_SUPPORTS_secp160r1
+    #undef uECC_MIN_WORDS
+    #define uECC_MIN_WORDS 5
+#endif
+
+#if (uECC_PLATFORM == uECC_arm_thumb)
+    #define REG_RW "+l"
+    #define REG_WRITE "=l"
+#else
+    #define REG_RW "+r"
+    #define REG_WRITE "=r"
+#endif
+
+#if (uECC_PLATFORM == uECC_arm_thumb || uECC_PLATFORM == uECC_arm_thumb2)
+    #define REG_RW_LO "+l"
+    #define REG_WRITE_LO "=l"
+#else
+    #define REG_RW_LO "+r"
+    #define REG_WRITE_LO "=r"
+#endif
+
+#if (uECC_PLATFORM == uECC_arm_thumb2)
+    #define RESUME_SYNTAX
+#else
+    #define RESUME_SYNTAX ".syntax divided \n\t"
+#endif
+
+#if (uECC_OPTIMIZATION_LEVEL >= 2)
+
+uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
+                                      const uECC_word_t *left,
+                                      const uECC_word_t *right,
+                                      wordcount_t num_words) {
+#if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+  #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2)
+    uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1;
+  #else /* ARM */
+    uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4;
+  #endif
+#endif
+    uint32_t carry;
+    uint32_t left_word;
+    uint32_t right_word;
+    
+    __asm__ volatile (
+        ".syntax unified \n\t"
+        "movs %[carry], #0 \n\t"
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        "adr %[left], 1f \n\t"
+        ".align 4 \n\t"
+        "adds %[jump], %[left] \n\t"
+    #endif
+        
+        "ldmia %[lptr]!, {%[left]} \n\t"
+        "ldmia %[rptr]!, {%[right]} \n\t"
+        "adds %[left], %[right] \n\t"
+        "stmia %[dptr]!, {%[left]} \n\t"
+        
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        "bx %[jump] \n\t"
+    #endif
+        "1: \n\t"
+        REPEAT(DEC(uECC_MAX_WORDS),
+            "ldmia %[lptr]!, {%[left]} \n\t"
+            "ldmia %[rptr]!, {%[right]} \n\t"
+            "adcs %[left], %[right] \n\t"
+            "stmia %[dptr]!, {%[left]} \n\t")
+        
+        "adcs %[carry], %[carry] \n\t"
+        RESUME_SYNTAX
+        : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right),
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+          [jump] REG_RW_LO (jump),
+    #endif
+          [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word),
+          [right] REG_WRITE_LO (right_word)
+        :
+        : "cc", "memory"
+    );
+    return carry;
+}
+#define asm_add 1
+
+uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
+                                      const uECC_word_t *left,
+                                      const uECC_word_t *right,
+                                      wordcount_t num_words) {
+#if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+  #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2)
+    uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1;
+  #else /* ARM */
+    uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4;
+  #endif
+#endif
+    uint32_t carry;
+    uint32_t left_word;
+    uint32_t right_word;
+    
+    __asm__ volatile (
+        ".syntax unified \n\t"
+        "movs %[carry], #0 \n\t"
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        "adr %[left], 1f \n\t"
+        ".align 4 \n\t"
+        "adds %[jump], %[left] \n\t"
+    #endif
+        
+        "ldmia %[lptr]!, {%[left]} \n\t"
+        "ldmia %[rptr]!, {%[right]} \n\t"
+        "subs %[left], %[right] \n\t"
+        "stmia %[dptr]!, {%[left]} \n\t"
+        
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+        "bx %[jump] \n\t"
+    #endif
+        "1: \n\t"
+        REPEAT(DEC(uECC_MAX_WORDS),
+            "ldmia %[lptr]!, {%[left]} \n\t"
+            "ldmia %[rptr]!, {%[right]} \n\t"
+            "sbcs %[left], %[right] \n\t"
+            "stmia %[dptr]!, {%[left]} \n\t")
+        
+        "adcs %[carry], %[carry] \n\t"
+        RESUME_SYNTAX
+        : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right),
+    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
+          [jump] REG_RW_LO (jump),
+    #endif
+          [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word),
+          [right] REG_WRITE_LO (right_word)
+        :
+        : "cc", "memory"
+    );
+    return !carry; /* Note that on ARM, carry flag set means "no borrow" when subtracting
+                      (for some reason...) */
+}
+#define asm_sub 1
+
+#endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */
+
+#if (uECC_OPTIMIZATION_LEVEL >= 3)
+
+#if (uECC_PLATFORM != uECC_arm_thumb)
+
+#if uECC_ARM_USE_UMAAL
+    #include "asm_arm_mult_square_umaal.inc"
+#else
+    #include "asm_arm_mult_square.inc"
+#endif
+
+#if (uECC_OPTIMIZATION_LEVEL == 3)
+
+uECC_VLI_API void uECC_vli_mult(uint32_t *result,
+                                const uint32_t *left,
+                                const uint32_t *right,
+                                wordcount_t num_words) {
+    register uint32_t *r0 __asm__("r0") = result;
+    register const uint32_t *r1 __asm__("r1") = left;
+    register const uint32_t *r2 __asm__("r2") = right;
+    register uint32_t r3 __asm__("r3") = num_words;
+    
+    __asm__ volatile (
+        ".syntax unified \n\t"
+#if (uECC_MIN_WORDS == 5)
+        FAST_MULT_ASM_5
+    #if (uECC_MAX_WORDS > 5)
+        FAST_MULT_ASM_5_TO_6
+    #endif
+    #if (uECC_MAX_WORDS > 6)
+        FAST_MULT_ASM_6_TO_7
+    #endif
+    #if (uECC_MAX_WORDS > 7)
+        FAST_MULT_ASM_7_TO_8
+    #endif
+#elif (uECC_MIN_WORDS == 6)
+        FAST_MULT_ASM_6
+    #if (uECC_MAX_WORDS > 6)
+        FAST_MULT_ASM_6_TO_7
+    #endif
+    #if (uECC_MAX_WORDS > 7)
+        FAST_MULT_ASM_7_TO_8
+    #endif
+#elif (uECC_MIN_WORDS == 7)
+        FAST_MULT_ASM_7
+    #if (uECC_MAX_WORDS > 7)
+        FAST_MULT_ASM_7_TO_8
+    #endif
+#elif (uECC_MIN_WORDS == 8)
+        FAST_MULT_ASM_8
+#endif
+        "1: \n\t"
+        RESUME_SYNTAX
+        : "+r" (r0), "+r" (r1), "+r" (r2)
+        : "r" (r3)
+        : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+    );
+}
+#define asm_mult 1
+
+#if uECC_SQUARE_FUNC
+uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
+                                  const uECC_word_t *left,
+                                  wordcount_t num_words) {
+    register uint32_t *r0 __asm__("r0") = result;
+    register const uint32_t *r1 __asm__("r1") = left;
+    register uint32_t r2 __asm__("r2") = num_words;
+    
+    __asm__ volatile (
+        ".syntax unified \n\t"
+#if (uECC_MIN_WORDS == 5)
+        FAST_SQUARE_ASM_5
+    #if (uECC_MAX_WORDS > 5)
+        FAST_SQUARE_ASM_5_TO_6
+    #endif
+    #if (uECC_MAX_WORDS > 6)
+        FAST_SQUARE_ASM_6_TO_7
+    #endif
+    #if (uECC_MAX_WORDS > 7)
+        FAST_SQUARE_ASM_7_TO_8
+    #endif
+#elif (uECC_MIN_WORDS == 6)
+        FAST_SQUARE_ASM_6
+    #if (uECC_MAX_WORDS > 6)
+        FAST_SQUARE_ASM_6_TO_7
+    #endif
+    #if (uECC_MAX_WORDS > 7)
+        FAST_SQUARE_ASM_7_TO_8
+    #endif
+#elif (uECC_MIN_WORDS == 7)
+        FAST_SQUARE_ASM_7
+    #if (uECC_MAX_WORDS > 7)
+        FAST_SQUARE_ASM_7_TO_8
+    #endif
+#elif (uECC_MIN_WORDS == 8)
+        FAST_SQUARE_ASM_8
+#endif
+
+        "1: \n\t"
+        RESUME_SYNTAX
+        : "+r" (r0), "+r" (r1)
+        : "r" (r2)
+        : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+    );
+}
+#define asm_square 1
+#endif /* uECC_SQUARE_FUNC */
+
+#else /* (uECC_OPTIMIZATION_LEVEL > 3) */
+
+uECC_VLI_API void uECC_vli_mult(uint32_t *result,
+                                const uint32_t *left,
+                                const uint32_t *right,
+                                wordcount_t num_words) {
+    register uint32_t *r0 __asm__("r0") = result;
+    register const uint32_t *r1 __asm__("r1") = left;
+    register const uint32_t *r2 __asm__("r2") = right;
+    register uint32_t r3 __asm__("r3") = num_words;
+    
+#if uECC_SUPPORTS_secp160r1
+    if (num_words == 5) {
+        __asm__ volatile (
+            ".syntax unified \n\t"
+            FAST_MULT_ASM_5
+            RESUME_SYNTAX
+            : "+r" (r0), "+r" (r1), "+r" (r2)
+            : "r" (r3)
+            : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+        );
+        return;
+    }
+#endif
+#if uECC_SUPPORTS_secp192r1
+    if (num_words == 6) {
+        __asm__ volatile (
+            ".syntax unified \n\t"
+            FAST_MULT_ASM_6
+            RESUME_SYNTAX
+            : "+r" (r0), "+r" (r1), "+r" (r2)
+            : "r" (r3)
+            : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+        );
+        return;
+    }
+#endif
+#if uECC_SUPPORTS_secp224r1
+    if (num_words == 7) {
+        __asm__ volatile (
+            ".syntax unified \n\t"
+            FAST_MULT_ASM_7
+            RESUME_SYNTAX
+            : "+r" (r0), "+r" (r1), "+r" (r2)
+            : "r" (r3)
+            : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+        );
+        return;
+    }
+#endif
+#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
+    if (num_words == 8) {
+        __asm__ volatile (
+            ".syntax unified \n\t"
+            FAST_MULT_ASM_8
+            RESUME_SYNTAX
+            : "+r" (r0), "+r" (r1), "+r" (r2)
+            : "r" (r3)
+            : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+        );
+        return;
+    }
+#endif
+}
+#define asm_mult 1
+
+#if uECC_SQUARE_FUNC
+uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
+                                  const uECC_word_t *left,
+                                  wordcount_t num_words) {
+    register uint32_t *r0 __asm__("r0") = result;
+    register const uint32_t *r1 __asm__("r1") = left;
+    register uint32_t r2 __asm__("r2") = num_words;
+    
+#if uECC_SUPPORTS_secp160r1
+    if (num_words == 5) {
+        __asm__ volatile (
+            ".syntax unified \n\t"
+            FAST_SQUARE_ASM_5
+            RESUME_SYNTAX
+            : "+r" (r0), "+r" (r1)
+            : "r" (r2)
+            : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+        );
+        return;
+    }
+#endif
+#if uECC_SUPPORTS_secp192r1
+    if (num_words == 6) {
+        __asm__ volatile (
+            ".syntax unified \n\t"
+            FAST_SQUARE_ASM_6
+            RESUME_SYNTAX
+            : "+r" (r0), "+r" (r1)
+            : "r" (r2)
+            : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+        );
+        return;
+    }
+#endif
+#if uECC_SUPPORTS_secp224r1
+    if (num_words == 7) {
+        __asm__ volatile (
+            ".syntax unified \n\t"
+            FAST_SQUARE_ASM_7
+            RESUME_SYNTAX
+            : "+r" (r0), "+r" (r1)
+            : "r" (r2)
+            : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+        );
+        return;
+    }
+#endif
+#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
+    if (num_words == 8) {
+        __asm__ volatile (
+            ".syntax unified \n\t"
+            FAST_SQUARE_ASM_8
+            RESUME_SYNTAX
+            : "+r" (r0), "+r" (r1)
+            : "r" (r2)
+            : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+        );
+        return;
+    }
+#endif
+}
+#define asm_square 1
+#endif /* uECC_SQUARE_FUNC */
+
+#endif /* (uECC_OPTIMIZATION_LEVEL > 3) */
+
+#endif /* uECC_PLATFORM != uECC_arm_thumb */
+
+#endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */
+
+/* ---- "Small" implementations ---- */
+
+#if !asm_add
+uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
+                                      const uECC_word_t *left,
+                                      const uECC_word_t *right,
+                                      wordcount_t num_words) {
+    uint32_t carry = 0;
+    uint32_t left_word;
+    uint32_t right_word;
+    
+    __asm__ volatile (
+        ".syntax unified \n\t"
+        "1: \n\t"
+        "ldmia %[lptr]!, {%[left]} \n\t"  /* Load left word. */
+        "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
+        "lsrs %[carry], #1 \n\t"          /* Set up carry flag (carry = 0 after this). */
+        "adcs %[left], %[left], %[right] \n\t"   /* Add with carry. */
+        "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */
+        "stmia %[dptr]!, {%[left]} \n\t"  /* Store result word. */
+        "subs %[ctr], #1 \n\t"            /* Decrement counter. */
+        "bne 1b \n\t"                     /* Loop until counter == 0. */
+        RESUME_SYNTAX
+        : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right),
+          [ctr] REG_RW (num_words), [carry] REG_RW (carry),
+          [left] REG_WRITE (left_word), [right] REG_WRITE (right_word)
+        :
+        : "cc", "memory"
+    );
+    return carry;
+}
+#define asm_add 1
+#endif
+
+#if !asm_sub
+uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
+                                      const uECC_word_t *left,
+                                      const uECC_word_t *right,
+                                      wordcount_t num_words) {
+    uint32_t carry = 1; /* carry = 1 initially (means don't borrow) */
+    uint32_t left_word;
+    uint32_t right_word;
+    
+    __asm__ volatile (
+        ".syntax unified \n\t"
+        "1: \n\t"
+        "ldmia %[lptr]!, {%[left]} \n\t"  /* Load left word. */
+        "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
+        "lsrs %[carry], #1 \n\t"          /* Set up carry flag (carry = 0 after this). */
+        "sbcs %[left], %[left], %[right] \n\t"   /* Subtract with borrow. */
+        "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */
+        "stmia %[dptr]!, {%[left]} \n\t"  /* Store result word. */
+        "subs %[ctr], #1 \n\t"            /* Decrement counter. */
+        "bne 1b \n\t"                     /* Loop until counter == 0. */
+        RESUME_SYNTAX
+        : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right),
+          [ctr] REG_RW (num_words), [carry] REG_RW (carry),
+          [left] REG_WRITE (left_word), [right] REG_WRITE (right_word)
+        :
+        : "cc", "memory"
+    );
+    return !carry;
+}
+#define asm_sub 1
+#endif
+
+#if !asm_mult
+uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
+                                const uECC_word_t *left,
+                                const uECC_word_t *right,
+                                wordcount_t num_words) {
+#if (uECC_PLATFORM != uECC_arm_thumb)
+    uint32_t c0 = 0;
+    uint32_t c1 = 0;
+    uint32_t c2 = 0;
+    uint32_t k = 0;
+    uint32_t i;
+    uint32_t t0, t1;
+    
+    __asm__ volatile (
+        ".syntax unified \n\t"
+        
+        "1: \n\t" /* outer loop (k < num_words) */
+        "movs %[i], #0 \n\t" /* i = 0 */
+        "b 3f \n\t"
+        
+        "2: \n\t" /* outer loop (k >= num_words) */
+        "movs %[i], %[k] \n\t"         /* i = k */
+        "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */
+        
+        "3: \n\t" /* inner loop */
+        "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */
+        
+        "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = right[k - i] */
+        "ldr %[t0], [%[left], %[i]] \n\t"   /* t0 = left[i] */
+        
+        "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
+        
+        "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
+        "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
+        "adcs %[c2], %[c2], #0 \n\t"    /* add carry to c2 */
+
+        "adds %[i], #4 \n\t"          /* i += 4 */
+        "cmp %[i], %[last_word] \n\t" /* i > (num_words - 1) (times 4)? */
+        "bgt 4f \n\t"                 /*   if so, exit the loop */
+        "cmp %[i], %[k] \n\t"         /* i <= k? */
+        "ble 3b \n\t"                 /*   if so, continue looping */
+        
+        "4: \n\t" /* end inner loop */
+        
+        "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
+        "mov %[c0], %[c1] \n\t"       /* c0 = c1 */
+        "mov %[c1], %[c2] \n\t"       /* c1 = c2 */
+        "movs %[c2], #0 \n\t"         /* c2 = 0 */
+        "adds %[k], #4 \n\t"          /* k += 4 */
+        "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */
+        "ble 1b \n\t"                 /*   if so, loop back, start with i = 0 */
+        "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */
+        "ble 2b \n\t"                 /*   if so, loop back, start with i = (k + 1) - num_words */
+        /* end outer loop */
+        
+        "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */
+        RESUME_SYNTAX
+        : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
+          [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1)
+        : [result] "r" (result), [left] "r" (left), [right] "r" (right),
+          [last_word] "r" ((num_words - 1) * 4)
+        : "cc", "memory"
+    );
+    
+#else /* Thumb-1 */
+    uint32_t r4, r5, r6, r7;
+
+    __asm__ volatile (
+        ".syntax unified \n\t"
+        "subs %[r3], #1 \n\t" /* r3 = num_words - 1 */
+        "lsls %[r3], #2 \n\t" /* r3 = (num_words - 1) * 4 */
+        "mov r8, %[r3] \n\t"  /* r8 = (num_words - 1) * 4 */
+        "lsls %[r3], #1 \n\t" /* r3 = (num_words - 1) * 8 */
+        "mov r9, %[r3] \n\t"  /* r9 = (num_words - 1) * 8 */
+        "movs %[r3], #0 \n\t" /* c0 = 0 */
+        "movs %[r4], #0 \n\t" /* c1 = 0 */
+        "movs %[r5], #0 \n\t" /* c2 = 0 */
+        "movs %[r6], #0 \n\t" /* k = 0 */
+        
+        "push {%[r0]} \n\t" /* keep result on the stack */
+        
+        "1: \n\t" /* outer loop (k < num_words) */
+        "movs %[r7], #0 \n\t" /* r7 = i = 0 */
+        "b 3f \n\t"
+        
+        "2: \n\t" /* outer loop (k >= num_words) */
+        "movs %[r7], %[r6] \n\t" /* r7 = k */
+        "mov %[r0], r8 \n\t"     /* r0 = (num_words - 1) * 4 */
+        "subs %[r7], %[r0] \n\t" /* r7 = i = k - (num_words - 1) (times 4) */
+        
+        "3: \n\t" /* inner loop */
+        "mov r10, %[r3] \n\t"
+        "mov r11, %[r4] \n\t"
+        "mov r12, %[r5] \n\t"
+        "mov r14, %[r6] \n\t"
+        "subs %[r0], %[r6], %[r7] \n\t"          /* r0 = k - i */
+        
+        "ldr %[r4], [%[r2], %[r0]] \n\t" /* r4 = right[k - i] */
+        "ldr %[r0], [%[r1], %[r7]] \n\t" /* r0 = left[i] */
+        
+        "lsrs %[r3], %[r0], #16 \n\t" /* r3 = a1 */
+        "uxth %[r0], %[r0] \n\t"      /* r0 = a0 */
+        
+        "lsrs %[r5], %[r4], #16 \n\t" /* r5 = b1 */
+        "uxth %[r4], %[r4] \n\t"      /* r4 = b0 */
+        
+        "movs %[r6], %[r3] \n\t"        /* r6 = a1 */
+        "muls %[r6], %[r5], %[r6] \n\t" /* r6 = a1 * b1 */
+        "muls %[r3], %[r4], %[r3] \n\t" /* r3 = b0 * a1 */
+        "muls %[r5], %[r0], %[r5] \n\t" /* r5 = a0 * b1 */
+        "muls %[r0], %[r4], %[r0] \n\t" /* r0 = a0 * b0 */
+        
+        /* Add middle terms */
+        "lsls %[r4], %[r3], #16 \n\t"
+        "lsrs %[r3], %[r3], #16 \n\t"
+        "adds %[r0], %[r4] \n\t"
+        "adcs %[r6], %[r3] \n\t"
+        
+        "lsls %[r4], %[r5], #16 \n\t"
+        "lsrs %[r5], %[r5], #16 \n\t"
+        "adds %[r0], %[r4] \n\t"
+        "adcs %[r6], %[r5] \n\t"
+        
+        "mov %[r3], r10\n\t"
+        "mov %[r4], r11\n\t"
+        "mov %[r5], r12\n\t"
+        "adds %[r3], %[r0] \n\t"         /* add low word to c0 */
+        "adcs %[r4], %[r6] \n\t"         /* add high word to c1, including carry */
+        "movs %[r0], #0 \n\t"            /* r0 = 0 (does not affect carry bit) */
+        "adcs %[r5], %[r0] \n\t"         /* add carry to c2 */
+        
+        "mov %[r6], r14\n\t" /* r6 = k */
+
+        "adds %[r7], #4 \n\t"   /* i += 4 */
+        "cmp %[r7], r8 \n\t"    /* i > (num_words - 1) (times 4)? */
+        "bgt 4f \n\t"           /*   if so, exit the loop */
+        "cmp %[r7], %[r6] \n\t" /* i <= k? */
+        "ble 3b \n\t"           /*   if so, continue looping */
+        
+        "4: \n\t" /* end inner loop */
+        
+        "ldr %[r0], [sp, #0] \n\t" /* r0 = result */
+        
+        "str %[r3], [%[r0], %[r6]] \n\t" /* result[k] = c0 */
+        "mov %[r3], %[r4] \n\t"          /* c0 = c1 */
+        "mov %[r4], %[r5] \n\t"          /* c1 = c2 */
+        "movs %[r5], #0 \n\t"            /* c2 = 0 */
+        "adds %[r6], #4 \n\t"            /* k += 4 */
+        "cmp %[r6], r8 \n\t"             /* k <= (num_words - 1) (times 4) ? */
+        "ble 1b \n\t"                    /*   if so, loop back, start with i = 0 */
+        "cmp %[r6], r9 \n\t"             /* k <= (num_words * 2 - 2) (times 4) ? */
+        "ble 2b \n\t"                    /*   if so, loop back, with i = (k + 1) - num_words */
+        /* end outer loop */
+        
+        "str %[r3], [%[r0], %[r6]] \n\t" /* result[num_words * 2 - 1] = c0 */
+        "pop {%[r0]} \n\t"               /* pop result off the stack */
+        
+        ".syntax divided \n\t"
+        : [r3] "+l" (num_words), [r4] "=&l" (r4),
+          [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7)
+        : [r0] "l" (result), [r1] "l" (left), [r2] "l" (right)
+        : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+    );
+#endif
+}
+#define asm_mult 1
+#endif
+
+#if uECC_SQUARE_FUNC
+#if !asm_square
+uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
+                                  const uECC_word_t *left,
+                                  wordcount_t num_words) {
+#if (uECC_PLATFORM != uECC_arm_thumb)
+    uint32_t c0 = 0;
+    uint32_t c1 = 0;
+    uint32_t c2 = 0;
+    uint32_t k = 0;
+    uint32_t i, tt;
+    uint32_t t0, t1;
+    
+    __asm__ volatile (
+        ".syntax unified \n\t"
+        
+        "1: \n\t" /* outer loop (k < num_words) */
+        "movs %[i], #0 \n\t" /* i = 0 */
+        "b 3f \n\t"
+        
+        "2: \n\t" /* outer loop (k >= num_words) */
+        "movs %[i], %[k] \n\t"         /* i = k */
+        "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */
+        
+        "3: \n\t" /* inner loop */
+        "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
+        
+        "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = left[k - i] */
+        "ldr %[t0], [%[left], %[i]] \n\t"  /* t0 = left[i] */
+        
+        "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
+        
+        "cmp %[i], %[tt] \n\t"      /* (i < k - i) ? */
+        "bge 4f \n\t"               /*   if i >= k - i, skip */
+        "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
+        "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
+        "adcs %[c2], %[c2], #0 \n\t"    /* add carry to c2 */
+        
+        "4: \n\t"
+        "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
+        "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
+        "adcs %[c2], %[c2], #0 \n\t"    /* add carry to c2 */
+        
+        "adds %[i], #4 \n\t"          /* i += 4 */
+        "cmp %[i], %[k] \n\t"         /* i >= k? */
+        "bge 5f \n\t"                 /*   if so, exit the loop */
+        "subs %[tt], %[k], %[i] \n\t" /* tt = k - i */
+        "cmp %[i], %[tt] \n\t"        /* i <= k - i? */
+        "ble 3b \n\t"                 /*   if so, continue looping */
+        
+        "5: \n\t" /* end inner loop */
+        
+        "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
+        "mov %[c0], %[c1] \n\t"       /* c0 = c1 */
+        "mov %[c1], %[c2] \n\t"       /* c1 = c2 */
+        "movs %[c2], #0 \n\t"         /* c2 = 0 */
+        "adds %[k], #4 \n\t"          /* k += 4 */
+        "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */
+        "ble 1b \n\t"                 /*   if so, loop back, start with i = 0 */
+        "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */
+        "ble 2b \n\t"                 /*   if so, loop back, start with i = (k + 1) - num_words */
+        /* end outer loop */
+        
+        "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */
+        RESUME_SYNTAX
+        : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
+          [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1)
+        : [result] "r" (result), [left] "r" (left), [last_word] "r" ((num_words - 1) * 4)
+        : "cc", "memory"
+    );
+    
+#else
+    uint32_t r3, r4, r5, r6, r7;
+
+    __asm__ volatile (
+        ".syntax unified \n\t"
+        "subs %[r2], #1 \n\t" /* r2 = num_words - 1 */
+        "lsls %[r2], #2 \n\t" /* r2 = (num_words - 1) * 4 */
+        "mov r8, %[r2] \n\t"  /* r8 = (num_words - 1) * 4 */
+        "lsls %[r2], #1 \n\t" /* r2 = (num_words - 1) * 8 */
+        "mov r9, %[r2] \n\t"  /* r9 = (num_words - 1) * 8 */
+        "movs %[r2], #0 \n\t" /* c0 = 0 */
+        "movs %[r3], #0 \n\t" /* c1 = 0 */
+        "movs %[r4], #0 \n\t" /* c2 = 0 */
+        "movs %[r5], #0 \n\t" /* k = 0 */
+        
+        "push {%[r0]} \n\t" /* keep result on the stack */
+        
+        "1: \n\t" /* outer loop (k < num_words) */
+        "movs %[r6], #0 \n\t" /* r6 = i = 0 */
+        "b 3f \n\t"
+        
+        "2: \n\t" /* outer loop (k >= num_words) */
+        "movs %[r6], %[r5] \n\t" /* r6 = k */
+        "mov %[r0], r8 \n\t"     /* r0 = (num_words - 1) * 4 */
+        "subs %[r6], %[r0] \n\t" /* r6 = i = k - (num_words - 1) (times 4) */
+        
+        "3: \n\t" /* inner loop */
+        "mov r10, %[r2] \n\t"
+        "mov r11, %[r3] \n\t"
+        "mov r12, %[r4] \n\t"
+        "mov r14, %[r5] \n\t"
+        "subs %[r7], %[r5], %[r6] \n\t"  /* r7 = k - i */
+        
+        "ldr %[r3], [%[r1], %[r7]] \n\t" /* r3 = left[k - i] */
+        "ldr %[r0], [%[r1], %[r6]] \n\t" /* r0 = left[i] */
+        
+        "lsrs %[r2], %[r0], #16 \n\t" /* r2 = a1 */
+        "uxth %[r0], %[r0] \n\t"      /* r0 = a0 */
+        
+        "lsrs %[r4], %[r3], #16 \n\t" /* r4 = b1 */
+        "uxth %[r3], %[r3] \n\t"      /* r3 = b0 */
+        
+        "movs %[r5], %[r2] \n\t"        /* r5 = a1 */
+        "muls %[r5], %[r4], %[r5] \n\t" /* r5 = a1 * b1 */
+        "muls %[r2], %[r3], %[r2] \n\t" /* r2 = b0 * a1 */
+        "muls %[r4], %[r0], %[r4] \n\t" /* r4 = a0 * b1 */
+        "muls %[r0], %[r3], %[r0] \n\t" /* r0 = a0 * b0 */
+        
+        /* Add middle terms */
+        "lsls %[r3], %[r2], #16 \n\t"
+        "lsrs %[r2], %[r2], #16 \n\t"
+        "adds %[r0], %[r3] \n\t"
+        "adcs %[r5], %[r2] \n\t"
+        
+        "lsls %[r3], %[r4], #16 \n\t"
+        "lsrs %[r4], %[r4], #16 \n\t"
+        "adds %[r0], %[r3] \n\t"
+        "adcs %[r5], %[r4] \n\t"
+        
+        /* Add to acc, doubling if necessary */
+        "mov %[r2], r10\n\t"
+        "mov %[r3], r11\n\t"
+        "mov %[r4], r12\n\t"
+        
+        "cmp %[r6], %[r7] \n\t"    /* (i < k - i) ? */
+        "bge 4f \n\t"            /*   if i >= k - i, skip */
+        "movs %[r7], #0 \n\t"    /* r7 = 0 */
+        "adds %[r2], %[r0] \n\t" /* add low word to c0 */
+        "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */
+        "adcs %[r4], %[r7] \n\t" /* add carry to c2 */
+        "4: \n\t"
+        "movs %[r7], #0 \n\t"    /* r7 = 0 */
+        "adds %[r2], %[r0] \n\t" /* add low word to c0 */
+        "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */
+        "adcs %[r4], %[r7] \n\t" /* add carry to c2 */
+        
+        "mov %[r5], r14\n\t" /* r5 = k */
+        
+        "adds %[r6], #4 \n\t"           /* i += 4 */
+        "cmp %[r6], %[r5] \n\t"         /* i >= k? */
+        "bge 5f \n\t"                   /*   if so, exit the loop */
+        "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */
+        "cmp %[r6], %[r7] \n\t"         /* i <= k - i? */
+        "ble 3b \n\t"                   /*   if so, continue looping */
+        
+        "5: \n\t" /* end inner loop */
+        
+        "ldr %[r0], [sp, #0] \n\t" /* r0 = result */
+        
+        "str %[r2], [%[r0], %[r5]] \n\t" /* result[k] = c0 */
+        "mov %[r2], %[r3] \n\t"          /* c0 = c1 */
+        "mov %[r3], %[r4] \n\t"          /* c1 = c2 */
+        "movs %[r4], #0 \n\t"            /* c2 = 0 */
+        "adds %[r5], #4 \n\t"            /* k += 4 */
+        "cmp %[r5], r8 \n\t"             /* k <= (num_words - 1) (times 4) ? */
+        "ble 1b \n\t"                    /*   if so, loop back, start with i = 0 */
+        "cmp %[r5], r9 \n\t"             /* k <= (num_words * 2 - 2) (times 4) ? */
+        "ble 2b \n\t"                    /*   if so, loop back, with i = (k + 1) - num_words */
+        /* end outer loop */
+        
+        "str %[r2], [%[r0], %[r5]] \n\t" /* result[num_words * 2 - 1] = c0 */
+        "pop {%[r0]} \n\t"               /* pop result off the stack */
+
+        ".syntax divided \n\t"
+        : [r2] "+l" (num_words), [r3] "=&l" (r3), [r4] "=&l" (r4),
+          [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7)
+        : [r0] "l" (result), [r1] "l" (left)
+        : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
+    );
+#endif
+}
+#define asm_square 1
+#endif
+#endif /* uECC_SQUARE_FUNC */
+
+#endif /* _UECC_ASM_ARM_H_ */
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm_mult_square.inc
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm_mult_square.inc
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm_mult_square_umaal.inc
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm_mult_square_umaal.inc
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_avr.inc
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_avr.inc
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_avr_mult_square.inc
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_avr_mult_square.inc
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/curve-specific.inc
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/curve-specific.inc
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/emk_project.py
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/emk_project.py
@@ -0,0 +1,127 @@
+import os
+
+c, link, asm, utils = emk.module("c", "link", "asm", "utils")
+
+default_compile_flags = ["-fvisibility=hidden", "-Wall", "-Wextra", "-Wshadow", "-Werror", "-Wno-missing-field-initializers", "-Wno-unused-parameter", \
+    "-Wno-comment", "-Wno-unused", "-Wno-unknown-pragmas"]
+default_link_flags = []
+opt_flags = {"dbg":["-g"], "std":["-O2"], "max":["-O3"], "small":["-Os"]}
+opt_link_flags = {"dbg":[], "std":[], "max":[], "small":[]}
+c_flags = ["-std=c99"]
+cxx_flags = ["-std=c++11", "-Wno-reorder", "-fno-rtti", "-fno-exceptions"]
+c_link_flags = []
+cxx_link_flags = ["-fno-rtti", "-fno-exceptions"]
+
+def setup_build_dir():
+    build_arch = None
+    if "arch" in emk.options:
+        build_arch = emk.options["arch"]
+    elif not emk.cleaning:
+        build_arch = "osx"
+    emk.options["arch"] = build_arch
+
+    opt_level = None
+    if "opt" in emk.options:
+        level = emk.options["opt"]
+        if level in opt_flags:
+            opt_level = level
+        else:
+            emk.log.warning("Unknown optimization level '%s'" % (level))
+    elif not emk.cleaning:
+        opt_level = "dbg"
+    emk.options["opt"] = opt_level
+
+    dirs = ["__build__"]
+    if build_arch:
+        dirs.append(build_arch)
+    if opt_level:
+        dirs.append(opt_level)
+    emk.build_dir = os.path.join(*dirs)
+
+def setup_osx():
+    global c
+    global link
+
+    flags = [("-arch", "x86_64"), "-fno-common", "-Wnewline-eof"]
+    c.flags.extend(flags)
+    c.cxx.flags += ["-stdlib=libc++"]
+    link.cxx.flags += ["-stdlib=libc++"]
+
+    link_flags = [("-arch", "x86_64")]
+    link.local_flags.extend(link_flags)
+
+def setup_avr():
+    global c
+    global link
+
+    c.compiler = c.GccCompiler("/Projects/avr-tools/bin/avr-")
+    c.flags += ["-mmcu=atmega256rfr2", "-ffunction-sections", "-fdata-sections"]
+    link.linker = link.GccLinker("/Projects/avr-tools/bin/avr-")
+    link.flags += ["-mmcu=atmega256rfr2", "-mrelax", "-Wl,--gc-sections"]
+    link.strip = True
+
+def setup_arm_thumb():
+    global c
+    global link
+    global asm
+    global utils
+
+    asm.assembler = asm.GccAssembler("/cross/arm_cortex/bin/arm-none-eabi-")
+    c.compiler = c.GccCompiler("/cross/arm_cortex/bin/arm-none-eabi-")
+    link.linker = link.GccLinker("/cross/arm_cortex/bin/arm-none-eabi-")
+
+    c.flags.extend(["-mcpu=cortex-m0", "-mthumb", "-ffunction-sections", "-fdata-sections", "-fno-builtin-fprintf", "-fno-builtin-printf"])
+    c.defines["LPC11XX"] = 1
+    
+    link.local_flags.extend(["-mcpu=cortex-m0", "-mthumb", "-nostartfiles", "-nostdlib", "-Wl,--gc-sections"])
+    link.local_flags.extend(["-Tflash.lds", "-L/Projects/lpc11xx/core", "/Projects/lpc11xx/core/" + emk.build_dir + "/board_cstartup.o"])
+    link.local_syslibs += ["gcc"]
+    link.depdirs += ["/Projects/lpc11xx/stdlib"]
+
+    def do_objcopy(produces, requires):
+        utils.call("/cross/arm_cortex/bin/arm-none-eabi-objcopy", "-O", "binary", requires[0], produces[0])
+
+    def handle_exe(path):
+        emk.depend(path, "/Projects/lpc11xx/core/" + emk.build_dir + "/board_cstartup.o")
+        emk.rule(do_objcopy, path + ".bin", path, cwd_safe=True, ex_safe=True)
+        emk.autobuild(path + ".bin")
+
+    link.exe_funcs.append(handle_exe)
+    link.strip = True
+    
+    emk.recurse("/Projects/lpc11xx/core")
+
+def setup_linux_rpi():
+    global c
+    global link
+
+    c.compiler = c.GccCompiler("/Volumes/xtools/arm-none-linux-gnueabi/bin/arm-none-linux-gnueabi-")
+    link.linker = link.GccLinker("/Volumes/xtools/arm-none-linux-gnueabi/bin/arm-none-linux-gnueabi-")
+    
+    c.flags.extend(["-fomit-frame-pointer"])
+
+setup_build_dir()
+
+setup_funcs = {"osx":setup_osx, "avr":setup_avr, "arm_thumb":setup_arm_thumb, "rpi": setup_linux_rpi}
+
+if not emk.cleaning:
+    build_arch = emk.options["arch"]
+    opt_level = emk.options["opt"]
+
+    c.flags.extend(default_compile_flags)
+    c.flags.extend(opt_flags[opt_level])
+    c.c.flags.extend(c_flags)
+    c.cxx.flags.extend(cxx_flags)
+    link.local_flags.extend(default_link_flags)
+    link.local_flags.extend(opt_link_flags[opt_level])
+    link.c.local_flags.extend(c_link_flags)
+    link.cxx.local_flags.extend(cxx_link_flags)
+
+    c.include_dirs.append("$:proj:$")
+
+    if build_arch in setup_funcs:
+        setup_funcs[build_arch]()
+    else:
+        raise emk.BuildError("Unknown target arch '%s'" % (build_arch))
+
+    c.defines["TARGET_ARCH_" + build_arch.upper()] = 1
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/emk_rules.py
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/emk_rules.py
@@ -0,0 +1,3 @@
+c, link = emk.module("c", "link")
+
+emk.subdir("test")
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/examples/ecc_test/ecc_test.ino
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/examples/ecc_test/ecc_test.ino
@@ -0,0 +1,85 @@
+#include <uECC.h>
+
+extern "C" {
+
+static int RNG(uint8_t *dest, unsigned size) {
+  // Use the least-significant bits from the ADC for an unconnected pin (or connected to a source of 
+  // random noise). This can take a long time to generate random data if the result of analogRead(0) 
+  // doesn't change very frequently.
+  while (size) {
+    uint8_t val = 0;
+    for (unsigned i = 0; i < 8; ++i) {
+      int init = analogRead(0);
+      int count = 0;
+      while (analogRead(0) == init) {
+        ++count;
+      }
+      
+      if (count == 0) {
+         val = (val << 1) | (init & 0x01);
+      } else {
+         val = (val << 1) | (count & 0x01);
+      }
+    }
+    *dest = val;
+    ++dest;
+    --size;
+  }
+  // NOTE: it would be a good idea to hash the resulting random data using SHA-256 or similar.
+  return 1;
+}
+
+}  // extern "C"
+
+void setup() {
+  Serial.begin(115200);
+  Serial.print("Testing ecc\n");
+  uECC_set_rng(&RNG);
+}
+
+void loop() {
+  const struct uECC_Curve_t * curve = uECC_secp160r1();
+  uint8_t private1[21];
+  uint8_t private2[21];
+  
+  uint8_t public1[40];
+  uint8_t public2[40];
+  
+  uint8_t secret1[20];
+  uint8_t secret2[20];
+  
+  unsigned long a = millis();
+  uECC_make_key(public1, private1, curve);
+  unsigned long b = millis();
+  
+  Serial.print("Made key 1 in "); Serial.println(b-a);
+  a = millis();
+  uECC_make_key(public2, private2, curve);
+  b = millis();
+  Serial.print("Made key 2 in "); Serial.println(b-a);
+
+  a = millis();
+  int r = uECC_shared_secret(public2, private1, secret1, curve);
+  b = millis();
+  Serial.print("Shared secret 1 in "); Serial.println(b-a);
+  if (!r) {
+    Serial.print("shared_secret() failed (1)\n");
+    return;
+  }
+
+  a = millis();
+  r = uECC_shared_secret(public1, private2, secret2, curve);
+  b = millis();
+  Serial.print("Shared secret 2 in "); Serial.println(b-a);
+  if (!r) {
+    Serial.print("shared_secret() failed (2)\n");
+    return;
+  }
+    
+  if (memcmp(secret1, secret2, 20) != 0) {
+    Serial.print("Shared secrets are not identical!\n");
+  } else {
+    Serial.print("Shared secrets are identical\n");
+  }
+}
+
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/library.properties
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/library.properties
@@ -0,0 +1,9 @@
+name=micro-ecc
+version=1.0.0
+author=Kenneth MacKay
+maintainer=Kenneth MacKay
+sentence=uECC
+paragraph=A small and fast ECDH and ECDSA implementation for 8-bit, 32-bit, and 64-bit processors.
+category=Other
+url=https://github.com/kmackay/micro-ecc
+architectures=*
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/platform-specific.inc
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/platform-specific.inc
@@ -0,0 +1,71 @@
+/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#ifndef _UECC_PLATFORM_SPECIFIC_H_
+#define _UECC_PLATFORM_SPECIFIC_H_
+
+#include "types.h"
+
+#if (defined(_WIN32) || defined(_WIN64))
+/* Windows */
+
+// use pragma syntax to prevent tweaking the linker script for getting CryptXYZ function
+#pragma comment(lib, "crypt32.lib")
+#pragma comment(lib, "advapi32.lib")
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <wincrypt.h>
+
+static int default_RNG(uint8_t *dest, unsigned size) {
+    HCRYPTPROV prov;
+    if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
+        return 0;
+    }
+
+    CryptGenRandom(prov, size, (BYTE *)dest);
+    CryptReleaseContext(prov, 0);
+    return 1;
+}
+#define default_RNG_defined 1
+
+#elif defined(unix) || defined(__linux__) || defined(__unix__) || defined(__unix) || \
+    (defined(__APPLE__) && defined(__MACH__)) || defined(uECC_POSIX)
+
+/* Some POSIX-like system with /dev/urandom or /dev/random. */
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#ifndef O_CLOEXEC
+    #define O_CLOEXEC 0
+#endif
+
+static int default_RNG(uint8_t *dest, unsigned size) {
+    int fd = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
+    if (fd == -1) {
+        fd = open("/dev/random", O_RDONLY | O_CLOEXEC);
+        if (fd == -1) {
+            return 0;
+        }
+    }
+    
+    char *ptr = (char *)dest;
+    size_t left = size;
+    while (left > 0) {
+        ssize_t bytes_read = read(fd, ptr, left);
+        if (bytes_read <= 0) { // read failed
+            close(fd);
+            return 0;
+        }
+        left -= bytes_read;
+        ptr += bytes_read;
+    }
+    
+    close(fd);
+    return 1;
+}
+#define default_RNG_defined 1
+
+#endif /* platform */
+
+#endif /* _UECC_PLATFORM_SPECIFIC_H_ */
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_arm.py
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_arm.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Provide the integer size in 32-bit words"
+    sys.exit(1)
+
+size = int(sys.argv[1])
+
+full_rows = size // 3
+init_size = size % 3
+
+if init_size == 0:
+    full_rows = full_rows - 1
+    init_size = 3
+
+def emit(line, *args):
+    s = '"' + line + r' \n\t"'
+    print s % args
+
+rx = [3, 4, 5]
+ry = [6, 7, 8]
+
+#### set up registers
+emit("add r0, %s", (size - init_size) * 4) # move z
+emit("add r2, %s", (size - init_size) * 4) # move y
+
+emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size)]))
+emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(init_size)]))
+
+print ""
+if init_size == 1:
+    emit("umull r9, r10, r3, r6")
+    emit("stmia r0!, {r9, r10}")
+else:
+    #### first two multiplications of initial block
+    emit("umull r11, r12, r3, r6")
+    emit("stmia r0!, {r11}")
+    print ""
+    emit("mov r10, #0")
+    emit("umull r11, r9, r3, r7")
+    emit("adds r12, r12, r11")
+    emit("adc r9, r9, #0")
+    emit("umull r11, r14, r4, r6")
+    emit("adds r12, r12, r11")
+    emit("adcs r9, r9, r14")
+    emit("adc r10, r10, #0")
+    emit("stmia r0!, {r12}")
+    print ""
+
+    #### rest of initial block, with moving accumulator registers
+    acc = [9, 10, 11, 12, 14]
+    if init_size == 3:
+        emit("mov r%s, #0", acc[2])
+        for i in xrange(0, 3):
+            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
+            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+            emit("adc r%s, r%s, #0", acc[2], acc[2])
+        emit("stmia r0!, {r%s}", acc[0])
+        print ""
+        acc = acc[1:] + acc[:1]
+
+        emit("mov r%s, #0", acc[2])
+        for i in xrange(0, 2):
+            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i + 1], ry[2 - i])
+            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+            emit("adc r%s, r%s, #0", acc[2], acc[2])
+        emit("stmia r0!, {r%s}", acc[0])
+        print ""
+        acc = acc[1:] + acc[:1]
+    
+    emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[init_size-1], ry[init_size-1])
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+    emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4])
+    emit("stmia r0!, {r%s}", acc[0])
+    emit("stmia r0!, {r%s}", acc[1])
+print ""
+
+#### reset y and z pointers
+emit("sub r0, %s", (2 * init_size + 3) * 4)
+emit("sub r2, %s", (init_size + 3) * 4)
+
+#### load y registers
+emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(3)]))
+
+#### load additional x registers
+if init_size != 3:
+    emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size, 3)]))
+print ""
+
+prev_size = init_size
+for row in xrange(full_rows):
+    emit("umull r11, r12, r3, r6")
+    emit("stmia r0!, {r11}")
+    print ""
+    emit("mov r10, #0")
+    emit("umull r11, r9, r3, r7")
+    emit("adds r12, r12, r11")
+    emit("adc r9, r9, #0")
+    emit("umull r11, r14, r4, r6")
+    emit("adds r12, r12, r11")
+    emit("adcs r9, r9, r14")
+    emit("adc r10, r10, #0")
+    emit("stmia r0!, {r12}")
+    print ""
+
+    acc = [9, 10, 11, 12, 14]
+    emit("mov r%s, #0", acc[2])
+    for i in xrange(0, 3):
+        emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+        emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
+    emit("stmia r0!, {r%s}", acc[0])
+    print ""
+    acc = acc[1:] + acc[:1]
+
+    #### now we need to start shifting x and loading from z
+    x_regs = [3, 4, 5]
+    for r in xrange(0, prev_size):
+        x_regs = x_regs[1:] + x_regs[:1]
+        emit("ldmia r1!, {r%s}", x_regs[2])
+        emit("mov r%s, #0", acc[2])
+        for i in xrange(0, 3):
+            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], ry[2 - i])
+            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+            emit("adc r%s, r%s, #0", acc[2], acc[2])
+        emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+        emit("adcs r%s, r%s, #0", acc[1], acc[1])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
+        emit("stmia r0!, {r%s}", acc[0])
+        print ""
+        acc = acc[1:] + acc[:1]
+
+    # done shifting x, start shifting y
+    y_regs = [6, 7, 8]
+    for r in xrange(0, prev_size):
+        y_regs = y_regs[1:] + y_regs[:1]
+        emit("ldmia r2!, {r%s}", y_regs[2])
+        emit("mov r%s, #0", acc[2])
+        for i in xrange(0, 3):
+            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], y_regs[2 - i])
+            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+            emit("adc r%s, r%s, #0", acc[2], acc[2])
+        emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+        emit("adcs r%s, r%s, #0", acc[1], acc[1])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
+        emit("stmia r0!, {r%s}", acc[0])
+        print ""
+        acc = acc[1:] + acc[:1]
+
+    # done both shifts, do remaining corner
+    emit("mov r%s, #0", acc[2])
+    for i in xrange(0, 2):
+        emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i + 1], y_regs[2 - i])
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+        emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
+    emit("stmia r0!, {r%s}", acc[0])
+    print ""
+    acc = acc[1:] + acc[:1]
+    
+    emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[2], y_regs[2])
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
+    emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4])
+    emit("stmia r0!, {r%s}", acc[0])
+    emit("stmia r0!, {r%s}", acc[1])
+    print ""
+    
+    prev_size = prev_size + 3
+    if row < full_rows - 1:
+        #### reset x, y and z pointers
+        emit("sub r0, %s", (2 * prev_size + 3) * 4)
+        emit("sub r1, %s", prev_size * 4)
+        emit("sub r2, %s", (prev_size + 3) * 4)
+
+        #### load x and y registers
+        emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in xrange(3)]))
+        emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in xrange(3)]))
+        
+        print ""
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_avr.py
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_avr.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Provide the integer size in bytes"
+    sys.exit(1)
+
+size = int(sys.argv[1])
+
+full_rows = size // 10
+init_size = size % 10
+
+if init_size == 0:
+    full_rows = full_rows - 1
+    init_size = 10
+
+def rx(i):
+    return i + 2
+
+def ry(i):
+    return i + 12
+
+def emit(line, *args):
+    s = '"' + line + r' \n\t"'
+    print s % args
+
+#### set up registers
+emit("adiw r30, %s", size - init_size) # move z
+emit("adiw r28, %s", size - init_size) # move y
+
+for i in xrange(init_size):
+    emit("ld r%s, x+", rx(i))
+for i in xrange(init_size):
+    emit("ld r%s, y+", ry(i))
+
+emit("ldi r25, 0")
+print ""
+if init_size == 1:
+    emit("mul r2, r12")
+    emit("st z+, r0")
+    emit("st z+, r1")
+else:
+    #### first two multiplications of initial block
+    emit("ldi r23, 0")
+    emit("mul r2, r12")
+    emit("st z+, r0")
+    emit("mov r22, r1")
+    print ""
+    emit("ldi r24, 0")
+    emit("mul r2, r13")
+    emit("add r22, r0")
+    emit("adc r23, r1")
+    emit("mul r3, r12")
+    emit("add r22, r0")
+    emit("adc r23, r1")
+    emit("adc r24, r25")
+    emit("st z+, r22")
+    print ""
+
+    #### rest of initial block, with moving accumulator registers
+    acc = [23, 24, 22]
+    for r in xrange(2, init_size):
+        emit("ldi r%s, 0", acc[2])
+        for i in xrange(0, r+1):
+            emit("mul r%s, r%s", rx(i), ry(r - i))
+            emit("add r%s, r0", acc[0])
+            emit("adc r%s, r1", acc[1])
+            emit("adc r%s, r25", acc[2])
+        emit("st z+, r%s", acc[0])
+        print ""
+        acc = acc[1:] + acc[:1]
+    for r in xrange(1, init_size-1):
+        emit("ldi r%s, 0", acc[2])
+        for i in xrange(0, init_size-r):
+            emit("mul r%s, r%s", rx(r+i), ry((init_size-1) - i))
+            emit("add r%s, r0", acc[0])
+            emit("adc r%s, r1", acc[1])
+            emit("adc r%s, r25", acc[2])
+        emit("st z+, r%s", acc[0])
+        print ""
+        acc = acc[1:] + acc[:1]
+    emit("mul r%s, r%s", rx(init_size-1), ry(init_size-1))
+    emit("add r%s, r0", acc[0])
+    emit("adc r%s, r1", acc[1])
+    emit("st z+, r%s", acc[0])
+    emit("st z+, r%s", acc[1])
+print ""
+
+#### reset y and z pointers
+emit("sbiw r30, %s", 2 * init_size + 10)
+emit("sbiw r28, %s", init_size + 10)
+
+#### load y registers
+for i in xrange(10):
+    emit("ld r%s, y+", ry(i))
+
+#### load additional x registers
+for i in xrange(init_size, 10):
+    emit("ld r%s, x+", rx(i))
+print ""
+
+prev_size = init_size
+for row in xrange(full_rows):
+    #### do x = 0-9, y = 0-9 multiplications
+    emit("ldi r23, 0")
+    emit("mul r2, r12")
+    emit("st z+, r0")
+    emit("mov r22, r1")
+    print ""
+    emit("ldi r24, 0")
+    emit("mul r2, r13")
+    emit("add r22, r0")
+    emit("adc r23, r1")
+    emit("mul r3, r12")
+    emit("add r22, r0")
+    emit("adc r23, r1")
+    emit("adc r24, r25")
+    emit("st z+, r22")
+    print ""
+
+    acc = [23, 24, 22]
+    for r in xrange(2, 10):
+        emit("ldi r%s, 0", acc[2])
+        for i in xrange(0, r+1):
+            emit("mul r%s, r%s", rx(i), ry(r - i))
+            emit("add r%s, r0", acc[0])
+            emit("adc r%s, r1", acc[1])
+            emit("adc r%s, r25", acc[2])
+        emit("st z+, r%s", acc[0])
+        print ""
+        acc = acc[1:] + acc[:1]
+
+    #### now we need to start shifting x and loading from z
+    x_regs = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+    for r in xrange(0, prev_size):
+        x_regs = x_regs[1:] + x_regs[:1]
+        emit("ld r%s, x+", x_regs[9]) # load next byte of left
+        emit("ldi r%s, 0", acc[2])
+        for i in xrange(0, 10):
+            emit("mul r%s, r%s", x_regs[i], ry(9 - i))
+            emit("add r%s, r0", acc[0])
+            emit("adc r%s, r1", acc[1])
+            emit("adc r%s, r25", acc[2])
+        emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
+        emit("add r%s, r0", acc[0])
+        emit("adc r%s, r25", acc[1])
+        emit("adc r%s, r25", acc[2])
+        emit("st z+, r%s", acc[0]) # store next byte (z increments)
+        print ""
+        acc = acc[1:] + acc[:1]
+
+    # done shifting x, start shifting y
+    y_regs = [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+    for r in xrange(0, prev_size):
+        y_regs = y_regs[1:] + y_regs[:1]
+        emit("ld r%s, y+", y_regs[9]) # load next byte of right
+        emit("ldi r%s, 0", acc[2])
+        for i in xrange(0, 10):
+            emit("mul r%s, r%s", x_regs[i], y_regs[9 -i])
+            emit("add r%s, r0", acc[0])
+            emit("adc r%s, r1", acc[1])
+            emit("adc r%s, r25", acc[2])
+        emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
+        emit("add r%s, r0", acc[0])
+        emit("adc r%s, r25", acc[1])
+        emit("adc r%s, r25", acc[2])
+        emit("st z+, r%s", acc[0]) # store next byte (z increments)
+        print ""
+        acc = acc[1:] + acc[:1]
+
+    # done both shifts, do remaining corner
+    for r in xrange(1, 9):
+        emit("ldi r%s, 0", acc[2])
+        for i in xrange(0, 10-r):
+            emit("mul r%s, r%s", x_regs[r+i], y_regs[9 - i])
+            emit("add r%s, r0", acc[0])
+            emit("adc r%s, r1", acc[1])
+            emit("adc r%s, r25", acc[2])
+        emit("st z+, r%s", acc[0])
+        print ""
+        acc = acc[1:] + acc[:1]
+    emit("mul r%s, r%s", x_regs[9], y_regs[9])
+    emit("add r%s, r0", acc[0])
+    emit("adc r%s, r1", acc[1])
+    emit("st z+, r%s", acc[0])
+    emit("st z+, r%s", acc[1])
+    print ""
+    
+    prev_size = prev_size + 10
+    if row < full_rows - 1:
+        #### reset x, y and z pointers
+        emit("sbiw r30, %s", 2 * prev_size + 10)
+        emit("sbiw r28, %s", prev_size + 10)
+        emit("sbiw r26, %s", prev_size)
+
+        #### load x and y registers
+        for i in xrange(10):
+            emit("ld r%s, x+", rx(i))
+            emit("ld r%s, y+", ry(i))
+        print ""
+
+emit("eor r1, r1")
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_avr_extra.py
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_avr_extra.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Provide the integer size in bytes"
+    sys.exit(1)
+
+size = int(sys.argv[1])
+
+def lhi(i):
+    return i + 2
+
+def rhi(i):
+    return i + 6
+
+left_lo = [10, 11, 12, 13]
+right_lo = [14, 15, 16, 17]
+
+def llo(i):
+    return left_lo[i]
+
+def rlo(i):
+    return right_lo[i]
+
+def emit(line, *args):
+    s = '"' + line + r' \n\t"'
+    print s % args
+
+def update_low():
+    global left_lo
+    global right_lo
+    left_lo = left_lo[1:] + left_lo[:1]
+    right_lo = right_lo[1:] + right_lo[:1]
+    emit("ld r%s, x+", left_lo[3])
+    emit("ld r%s, y+", right_lo[3])
+
+accum = [19, 20, 21]
+
+def acc(i):
+    return accum[i]
+
+def rotate_acc():
+    global accum
+    accum = accum[1:] + accum[:1]
+
+# Load high values
+for i in xrange(4):
+    emit("ld r%s, x+", lhi(i))
+    emit("ld r%s, y+", rhi(i))
+
+emit("sbiw r26, %s", size + 4)
+emit("sbiw r28, %s", size + 4)
+emit("sbiw r30, %s", size)
+
+# Load low values
+for i in xrange(4):
+    emit("ld r%s, x+", llo(i))
+    emit("ld r%s, y+", rlo(i))
+print ""
+
+# Compute initial triangles
+emit("mul r%s, r%s", lhi(0), rlo(0))
+emit("mov r%s, r0", acc(0))
+emit("mov r%s, r1", acc(1))
+emit("ldi r%s, 0", acc(2))
+emit("ld r0, z")
+emit("add r%s, r0", acc(0))
+emit("adc r%s, r25", acc(1))
+emit("mul r%s, r%s", rhi(0), llo(0))
+emit("add r%s, r0", acc(0))
+emit("adc r%s, r1", acc(1))
+emit("adc r%s, r25", acc(2))
+emit("st z+, r%s", acc(0))
+print ""
+rotate_acc()
+
+for i in xrange(1, 4):
+    emit("ldi r%s, 0", acc(2))
+    emit("ld r0, z")
+    emit("add r%s, r0", acc(0))
+    emit("adc r%s, r25", acc(1))
+    for j in xrange(i + 1):
+        emit("mul r%s, r%s", lhi(j), rlo(i-j))
+        emit("add r%s, r0", acc(0))
+        emit("adc r%s, r1", acc(1))
+        emit("adc r%s, r25", acc(2))
+        emit("mul r%s, r%s", rhi(j), llo(i-j))
+        emit("add r%s, r0", acc(0))
+        emit("adc r%s, r1", acc(1))
+        emit("adc r%s, r25", acc(2))
+    emit("st z+, r%s", acc(0))
+    print ""
+    rotate_acc()
+
+# Compute rows overlapping old block
+for i in xrange(4, size):
+    emit("ldi r%s, 0", acc(2))
+    emit("ld r0, z")
+    emit("add r%s, r0", acc(0))
+    emit("adc r%s, r25", acc(1))
+    update_low()
+    for j in xrange(4):
+        emit("mul r%s, r%s", lhi(j), rlo(3-j))
+        emit("add r%s, r0", acc(0))
+        emit("adc r%s, r1", acc(1))
+        emit("adc r%s, r25", acc(2))
+        emit("mul r%s, r%s", rhi(j), llo(3-j))
+        emit("add r%s, r0", acc(0))
+        emit("adc r%s, r1", acc(1))
+        emit("adc r%s, r25", acc(2))
+    emit("st z+, r%s", acc(0))
+    print ""
+    rotate_acc()
+
+# Compute new triangle
+left_combined = [llo(1), llo(2), llo(3), lhi(0), lhi(1), lhi(2), lhi(3)]
+right_combined = [rlo(1), rlo(2), rlo(3), rhi(0), rhi(1), rhi(2), rhi(3)]
+
+def left(i):
+    return left_combined[i]
+
+def right(i):
+    return right_combined[i]
+
+for i in xrange(6):
+    emit("ldi r%s, 0", acc(2))
+    for j in xrange(7 - i):
+        emit("mul r%s, r%s", left(i+j), right(6-j))
+        emit("add r%s, r0", acc(0))
+        emit("adc r%s, r1", acc(1))
+        emit("adc r%s, r25", acc(2))
+    emit("st z+, r%s", acc(0))
+    print ""
+    rotate_acc()
+
+emit("mul r%s, r%s", left(6), right(6))
+emit("add r%s, r0", acc(0))
+emit("adc r%s, r1", acc(1))
+emit("st z+, r%s", acc(0))
+emit("st z+, r%s", acc(1))
+emit("adiw r26, 4")
+emit("adiw r28, 4")
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/square_arm.py
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/square_arm.py
@@ -0,0 +1,242 @@
+#!/usr/bin/env python
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Provide the integer size in 32-bit words"
+    sys.exit(1)
+
+size = int(sys.argv[1])
+
+if size > 8:
+    print "This script doesn't work with integer size %s due to laziness" % (size)
+    sys.exit(1)
+
+init_size = 0
+if size > 6:
+    init_size = size - 6
+
+def emit(line, *args):
+    s = '"' + line + r' \n\t"'
+    print s % args
+
+def mulacc(acc, r1, r2):
+    if size <= 6:
+        emit("umull r1, r14, r%s, r%s", r1, r2)
+        emit("adds r%s, r%s, r1", acc[0], acc[0])
+        emit("adcs r%s, r%s, r14", acc[1], acc[1])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
+    else:
+        emit("mov r14, r%s", acc[1])
+        emit("umlal r%s, r%s, r%s, r%s", acc[0], acc[1], r1, r2)
+        emit("cmp r14, r%s", acc[1])
+        emit("it hi")
+        emit("adchi r%s, r%s, #0", acc[2], acc[2])
+
+r = [2, 3, 4, 5, 6, 7]
+
+s = size - init_size
+
+if init_size == 1:
+    emit("ldmia r1!, {r2}")
+    emit("add r1, %s", (size - init_size * 2) * 4)
+    emit("ldmia r1!, {r5}")
+    
+    emit("add r0, %s", (size - init_size) * 4)
+    emit("umull r8, r9, r2, r5")
+    emit("stmia r0!, {r8, r9}")
+    
+    emit("sub r0, %s", (size + init_size) * 4)
+    emit("sub r1, %s", (size) * 4)
+    print ""
+elif init_size == 2:
+    emit("ldmia r1!, {r2, r3}")
+    emit("add r1, %s", (size - init_size * 2) * 4)
+    emit("ldmia r1!, {r5, r6}")
+    
+    emit("add r0, %s", (size - init_size) * 4)
+    print ""
+
+    emit("umull r8, r9, r2, r5")
+    emit("stmia r0!, {r8}")
+    print ""
+    
+    emit("umull r12, r10, r2, r6")
+    emit("adds r9, r9, r12")
+    emit("adc r10, r10, #0")
+    emit("stmia r0!, {r9}")
+    print ""
+    
+    emit("umull r8, r9, r3, r6")
+    emit("adds r10, r10, r8")
+    emit("adc r11, r9, #0")
+    emit("stmia r0!, {r10, r11}")
+    print ""
+    
+    emit("sub r0, %s", (size + init_size) * 4)
+    emit("sub r1, %s", (size) * 4)
+
+# load input words
+emit("ldmia r1!, {%s}", ", ".join(["r%s" % (r[i]) for i in xrange(s)]))
+print ""
+
+emit("umull r11, r12, r2, r2")
+emit("stmia r0!, {r11}")
+print ""
+emit("mov r9, #0")
+emit("umull r10, r11, r2, r3")
+emit("adds r12, r12, r10")
+emit("adcs r8, r11, #0")
+emit("adc r9, r9, #0")
+emit("adds r12, r12, r10")
+emit("adcs r8, r8, r11")
+emit("adc r9, r9, #0")
+emit("stmia r0!, {r12}")
+print ""
+emit("mov r10, #0")
+emit("umull r11, r12, r2, r4")
+emit("adds r11, r11, r11")
+emit("adcs r12, r12, r12")
+emit("adc r10, r10, #0")
+emit("adds r8, r8, r11")
+emit("adcs r9, r9, r12")
+emit("adc r10, r10, #0")
+emit("umull r11, r12, r3, r3")
+emit("adds r8, r8, r11")
+emit("adcs r9, r9, r12")
+emit("adc r10, r10, #0")
+emit("stmia r0!, {r8}")
+print ""
+
+acc = [8, 9, 10]
+old_acc = [11, 12]
+for i in xrange(3, s):
+    emit("mov r%s, #0", old_acc[1])
+    tmp = [acc[1], acc[2]]
+    acc = [acc[0], old_acc[0], old_acc[1]]
+    old_acc = tmp
+    
+    # gather non-equal words
+    emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], r[0], r[i])
+    for j in xrange(1, (i+1)//2):
+        mulacc(acc, r[j], r[i-j])
+    # multiply by 2
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
+    emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
+    emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
+    
+    # add equal word (if any)
+    if ((i+1) % 2) != 0:
+        mulacc(acc, r[i//2], r[i//2])
+    
+    # add old accumulator
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
+    emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+    emit("adc r%s, r%s, #0", acc[2], acc[2])
+    
+    # store
+    emit("stmia r0!, {r%s}", acc[0])
+    print ""
+
+regs = list(r)
+for i in xrange(init_size):
+    regs = regs[1:] + regs[:1]
+    emit("ldmia r1!, {r%s}", regs[5])
+    
+    for limit in [4, 5]:
+        emit("mov r%s, #0", old_acc[1])
+        tmp = [acc[1], acc[2]]
+        acc = [acc[0], old_acc[0], old_acc[1]]
+        old_acc = tmp
+    
+        # gather non-equal words
+        emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], regs[0], regs[limit])
+        for j in xrange(1, (limit+1)//2):
+            mulacc(acc, regs[j], regs[limit-j])
+    
+        emit("ldr r14, [r0]") # load stored value from initial block, and add to accumulator
+        emit("adds r%s, r%s, r14", acc[0], acc[0])
+        emit("adcs r%s, r%s, #0", acc[1], acc[1])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
+    
+        # multiply by 2
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
+        emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
+        emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
+    
+        # add equal word
+        if limit == 4:
+            mulacc(acc, regs[2], regs[2])
+    
+        # add old accumulator
+        emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
+        emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+        emit("adc r%s, r%s, #0", acc[2], acc[2])
+    
+        # store
+        emit("stmia r0!, {r%s}", acc[0])
+        print ""
+
+for i in xrange(1, s-3):
+    emit("mov r%s, #0", old_acc[1])
+    tmp = [acc[1], acc[2]]
+    acc = [acc[0], old_acc[0], old_acc[1]]
+    old_acc = tmp
+
+    # gather non-equal words
+    emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], regs[i], regs[s - 1])
+    for j in xrange(1, (s-i)//2):
+        mulacc(acc, regs[i+j], regs[s - 1 - j])
+
+    # multiply by 2
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
+    emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
+    emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
+
+    # add equal word (if any)
+    if ((s-i) % 2) != 0:
+        mulacc(acc, regs[i + (s-i)//2], regs[i + (s-i)//2])
+
+    # add old accumulator
+    emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
+    emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+    emit("adc r%s, r%s, #0", acc[2], acc[2])
+
+    # store
+    emit("stmia r0!, {r%s}", acc[0])
+    print ""
+
+acc = acc[1:] + acc[:1]
+emit("mov r%s, #0", acc[2])
+emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 3], regs[s - 1])
+emit("adds r1, r1, r1")
+emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
+emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 2])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
+emit("stmia r0!, {r%s}", acc[0])
+print ""
+
+acc = acc[1:] + acc[:1]
+emit("mov r%s, #0", acc[2])
+emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 1])
+emit("adds r1, r1, r1")
+emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+emit("adc r%s, r%s, #0", acc[2], acc[2])
+emit("stmia r0!, {r%s}", acc[0])
+print ""
+
+acc = acc[1:] + acc[:1]
+emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 1], regs[s - 1])
+emit("adds r%s, r%s, r1", acc[0], acc[0])
+emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
+emit("stmia r0!, {r%s}", acc[0])
+emit("stmia r0!, {r%s}", acc[1])
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/square_avr.py
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/square_avr.py
@@ -0,0 +1,327 @@
+#!/usr/bin/env python
+
+import sys
+
+if len(sys.argv) < 2:
+    print "Provide the integer size in bytes"
+    sys.exit(1)
+
+size = int(sys.argv[1])
+
+if size > 40:
+    print "This script doesn't work with integer size %s due to laziness" % (size)
+    sys.exit(1)
+
+init_size = size - 20
+if size < 20:
+    init_size = 0
+
+def rg(i):
+    return i + 2
+
+def lo(i):
+    return i + 2
+
+def hi(i):
+    return i + 12
+
+def emit(line, *args):
+    s = '"' + line + r' \n\t"'
+    print s % args
+
+#### set up registers
+zero = "r25"
+emit("ldi %s, 0", zero) # zero register
+
+if init_size > 0:
+    emit("movw r28, r26") # y = x
+    h = (init_size + 1)//2
+    
+    for i in xrange(h):
+        emit("ld r%s, x+", lo(i))
+    emit("adiw r28, %s", size - init_size) # move y to other end
+    for i in xrange(h):
+        emit("ld r%s, y+", hi(i))
+
+    emit("adiw r30, %s", size - init_size) # move z
+
+    if init_size == 1:
+        emit("mul %s, %s", lo(0), hi(0))
+        emit("st z+, r0")
+        emit("st z+, r1")
+    else:
+        #### first one
+        print ""
+        emit("ldi r23, 0")
+        emit("mul %s, %s", lo(0), hi(0))
+        emit("st z+, r0")
+        emit("mov r22, r1")
+        print ""
+
+        #### rest of initial block, with moving accumulator registers
+        acc = [22, 23, 24]
+        for r in xrange(1, h):
+            emit("ldi r%s, 0", acc[2])
+            for i in xrange(0, (r+2)//2):
+                emit("mul r%s, r%s", lo(i), hi(r - i))
+                emit("add r%s, r0", acc[0])
+                emit("adc r%s, r1", acc[1])
+                emit("adc r%s, %s", acc[2], zero)
+            emit("st z+, r%s", acc[0])
+            print ""
+            acc = acc[1:] + acc[:1]
+        
+        lo_r = range(2, 2 + h)
+        hi_r = range(12, 12 + h)
+        
+        # now we need to start loading more from the high end
+        for r in xrange(h, init_size):
+            hi_r = hi_r[1:] + hi_r[:1]
+            emit("ld r%s, y+", hi_r[h-1])
+            
+            emit("ldi r%s, 0", acc[2])
+            for i in xrange(0, (r+2)//2):
+                emit("mul r%s, r%s", lo(i), hi_r[h - 1 - i])
+                emit("add r%s, r0", acc[0])
+                emit("adc r%s, r1", acc[1])
+                emit("adc r%s, %s", acc[2], zero)
+            emit("st z+, r%s", acc[0])
+            print ""
+            acc = acc[1:] + acc[:1]
+            
+        # loaded all of the high end bytes; now need to start loading the rest of the low end
+        for r in xrange(1, init_size-h):
+            lo_r = lo_r[1:] + lo_r[:1]
+            emit("ld r%s, x+", lo_r[h-1])
+            
+            emit("ldi r%s, 0", acc[2])
+            for i in xrange(0, (init_size+1 - r)//2):
+                emit("mul r%s, r%s", lo_r[i], hi_r[h - 1 - i])
+                emit("add r%s, r0", acc[0])
+                emit("adc r%s, r1", acc[1])
+                emit("adc r%s, %s", acc[2], zero)
+            emit("st z+, r%s", acc[0])
+            print ""
+            acc = acc[1:] + acc[:1]
+        
+        lo_r = lo_r[1:] + lo_r[:1]
+        emit("ld r%s, x+", lo_r[h-1])
+        
+        # now we have loaded everything, and we just need to finish the last corner
+        for r in xrange(init_size-h, init_size-1):
+            emit("ldi r%s, 0", acc[2])
+            for i in xrange(0, (init_size+1 - r)//2):
+                emit("mul r%s, r%s", lo_r[i], hi_r[h - 1 - i])
+                emit("add r%s, r0", acc[0])
+                emit("adc r%s, r1", acc[1])
+                emit("adc r%s, %s", acc[2], zero)
+            emit("st z+, r%s", acc[0])
+            print ""
+            acc = acc[1:] + acc[:1]
+            lo_r = lo_r[1:] + lo_r[:1] # make the indexing easy
+        
+        emit("mul r%s, r%s", lo_r[0], hi_r[h - 1])
+        emit("add r%s, r0", acc[0])
+        emit("adc r%s, r1", acc[1])
+        emit("st z+, r%s", acc[0])
+        emit("st z+, r%s", acc[1])
+    print ""
+    emit("sbiw r26, %s", init_size) # reset x
+    emit("sbiw r30, %s", size + init_size) # reset z
+
+# TODO you could do more rows of size 20 here if your integers are larger than 40 bytes
+
+s = size - init_size
+
+for i in xrange(s):
+    emit("ld r%s, x+", rg(i))
+
+#### first few columns
+# NOTE: this is only valid if size >= 3
+print ""
+emit("ldi r23, 0")
+emit("mul r%s, r%s", rg(0), rg(0))
+emit("st z+, r0")
+emit("mov r22, r1")
+print ""
+emit("ldi r24, 0")
+emit("mul r%s, r%s", rg(0), rg(1))
+emit("add r22, r0")
+emit("adc r23, r1")
+emit("adc r24, %s", zero)
+emit("add r22, r0")
+emit("adc r23, r1")
+emit("adc r24, %s", zero)
+emit("st z+, r22")
+print ""
+emit("ldi r22, 0")
+emit("mul r%s, r%s", rg(0), rg(2))
+emit("add r23, r0")
+emit("adc r24, r1")
+emit("adc r22, %s", zero)
+emit("add r23, r0")
+emit("adc r24, r1")
+emit("adc r22, %s", zero)
+emit("mul r%s, r%s", rg(1), rg(1))
+emit("add r23, r0")
+emit("adc r24, r1")
+emit("adc r22, %s", zero)
+emit("st z+, r23")
+print ""
+
+acc = [23, 24, 22]
+old_acc = [28, 29]
+for i in xrange(3, s):
+    emit("ldi r%s, 0", old_acc[1])
+    tmp = [acc[1], acc[2]]
+    acc = [acc[0], old_acc[0], old_acc[1]]
+    old_acc = tmp
+    
+    # gather non-equal words
+    emit("mul r%s, r%s", rg(0), rg(i))
+    emit("mov r%s, r0", acc[0])
+    emit("mov r%s, r1", acc[1])
+    for j in xrange(1, (i+1)//2):
+        emit("mul r%s, r%s", rg(j), rg(i-j))
+        emit("add r%s, r0", acc[0])
+        emit("adc r%s, r1", acc[1])
+        emit("adc r%s, %s", acc[2], zero)
+    # multiply by 2
+    emit("lsl r%s", acc[0])
+    emit("rol r%s", acc[1])
+    emit("rol r%s", acc[2])
+    
+    # add equal word (if any)
+    if ((i+1) % 2) != 0:
+        emit("mul r%s, r%s", rg(i//2), rg(i//2))
+        emit("add r%s, r0", acc[0])
+        emit("adc r%s, r1", acc[1])
+        emit("adc r%s, %s", acc[2], zero)
+    
+    # add old accumulator
+    emit("add r%s, r%s", acc[0], old_acc[0])
+    emit("adc r%s, r%s", acc[1], old_acc[1])
+    emit("adc r%s, %s", acc[2], zero)
+    
+    # store
+    emit("st z+, r%s", acc[0])
+    print ""
+
+regs = range(2, 22)
+for i in xrange(init_size):
+    regs = regs[1:] + regs[:1]
+    emit("ld r%s, x+", regs[19])
+    
+    for limit in [18, 19]:
+        emit("ldi r%s, 0", old_acc[1])
+        tmp = [acc[1], acc[2]]
+        acc = [acc[0], old_acc[0], old_acc[1]]
+        old_acc = tmp
+    
+        # gather non-equal words
+        emit("mul r%s, r%s", regs[0], regs[limit])
+        emit("mov r%s, r0", acc[0])
+        emit("mov r%s, r1", acc[1])
+        for j in xrange(1, (limit+1)//2):
+            emit("mul r%s, r%s", regs[j], regs[limit-j])
+            emit("add r%s, r0", acc[0])
+            emit("adc r%s, r1", acc[1])
+            emit("adc r%s, %s", acc[2], zero)
+    
+        emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
+        emit("add r%s, r0", acc[0])
+        emit("adc r%s, r25", acc[1])
+        emit("adc r%s, r25", acc[2])
+    
+        # multiply by 2
+        emit("lsl r%s", acc[0])
+        emit("rol r%s", acc[1])
+        emit("rol r%s", acc[2])
+    
+        # add equal word
+        if limit == 18:
+            emit("mul r%s, r%s", regs[9], regs[9])
+            emit("add r%s, r0", acc[0])
+            emit("adc r%s, r1", acc[1])
+            emit("adc r%s, %s", acc[2], zero)
+    
+        # add old accumulator
+        emit("add r%s, r%s", acc[0], old_acc[0])
+        emit("adc r%s, r%s", acc[1], old_acc[1])
+        emit("adc r%s, %s", acc[2], zero)
+    
+        # store
+        emit("st z+, r%s", acc[0])
+        print ""
+
+for i in xrange(1, s-3):
+    emit("ldi r%s, 0", old_acc[1])
+    tmp = [acc[1], acc[2]]
+    acc = [acc[0], old_acc[0], old_acc[1]]
+    old_acc = tmp
+
+    # gather non-equal words
+    emit("mul r%s, r%s", regs[i], regs[s - 1])
+    emit("mov r%s, r0", acc[0])
+    emit("mov r%s, r1", acc[1])
+    for j in xrange(1, (s-i)//2):
+        emit("mul r%s, r%s", regs[i+j], regs[s - 1 - j])
+        emit("add r%s, r0", acc[0])
+        emit("adc r%s, r1", acc[1])
+        emit("adc r%s, %s", acc[2], zero)
+    # multiply by 2
+    emit("lsl r%s", acc[0])
+    emit("rol r%s", acc[1])
+    emit("rol r%s", acc[2])
+
+    # add equal word (if any)
+    if ((s-i) % 2) != 0:
+        emit("mul r%s, r%s", regs[i + (s-i)//2], regs[i + (s-i)//2])
+        emit("add r%s, r0", acc[0])
+        emit("adc r%s, r1", acc[1])
+        emit("adc r%s, %s", acc[2], zero)
+
+    # add old accumulator
+    emit("add r%s, r%s", acc[0], old_acc[0])
+    emit("adc r%s, r%s", acc[1], old_acc[1])
+    emit("adc r%s, %s", acc[2], zero)
+
+    # store
+    emit("st z+, r%s", acc[0])
+    print ""
+
+acc = acc[1:] + acc[:1]
+emit("ldi r%s, 0", acc[2])
+emit("mul r%s, r%s", regs[17], regs[19])
+emit("add r%s, r0", acc[0])
+emit("adc r%s, r1", acc[1])
+emit("adc r%s, %s", acc[2], zero)
+emit("add r%s, r0", acc[0])
+emit("adc r%s, r1", acc[1])
+emit("adc r%s, %s", acc[2], zero)
+emit("mul r%s, r%s", regs[18], regs[18])
+emit("add r%s, r0", acc[0])
+emit("adc r%s, r1", acc[1])
+emit("adc r%s, %s", acc[2], zero)
+emit("st z+, r%s", acc[0])
+print ""
+
+acc = acc[1:] + acc[:1]
+emit("ldi r%s, 0", acc[2])
+emit("mul r%s, r%s", regs[18], regs[19])
+emit("add r%s, r0", acc[0])
+emit("adc r%s, r1", acc[1])
+emit("adc r%s, %s", acc[2], zero)
+emit("add r%s, r0", acc[0])
+emit("adc r%s, r1", acc[1])
+emit("adc r%s, %s", acc[2], zero)
+emit("st z+, r%s", acc[0])
+print ""
+
+emit("mul r%s, r%s", regs[19], regs[19])
+emit("add r%s, r0", acc[1])
+emit("adc r%s, r1", acc[2])
+emit("st z+, r%s", acc[1])
+
+emit("st z+, r%s", acc[2])
+emit("eor r1, r1")
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/emk_rules.py
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/emk_rules.py
@@ -0,0 +1,4 @@
+c, link = emk.module("c", "link")
+link.depdirs += [
+    "$:proj:$"
+]
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_compress.c
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_compress.c
@@ -0,0 +1,79 @@
+/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#include "uECC.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#ifndef uECC_TEST_NUMBER_OF_ITERATIONS
+#define uECC_TEST_NUMBER_OF_ITERATIONS   256
+#endif
+
+void vli_print(char *str, uint8_t *vli, unsigned int size) {
+    printf("%s ", str);
+    for(unsigned i=0; i<size; ++i) {
+        printf("%02X ", (unsigned)vli[i]);
+    }
+    printf("\n");
+}
+
+int main() {
+    uint8_t public[64];
+    uint8_t private[32];
+    uint8_t compressed_point[33];
+    uint8_t decompressed_point[64];
+
+    int i;
+    int c;
+    
+    const struct uECC_Curve_t * curves[5];
+    int num_curves = 0;
+#if uECC_SUPPORTS_secp160r1
+    curves[num_curves++] = uECC_secp160r1();
+#endif
+#if uECC_SUPPORTS_secp192r1
+    curves[num_curves++] = uECC_secp192r1();
+#endif
+#if uECC_SUPPORTS_secp224r1
+    curves[num_curves++] = uECC_secp224r1();
+#endif
+#if uECC_SUPPORTS_secp256r1
+    curves[num_curves++] = uECC_secp256r1();
+#endif
+#if uECC_SUPPORTS_secp256k1
+    curves[num_curves++] = uECC_secp256k1();
+#endif
+    
+    printf("Testing compression and decompression of %d random EC points\n",
+           uECC_TEST_NUMBER_OF_ITERATIONS);
+
+    for (c = 0; c < num_curves; ++c) {
+        for (i = 0; i < uECC_TEST_NUMBER_OF_ITERATIONS; ++i) {
+            printf(".");
+            fflush(stdout);
+            
+            memset(public, 0, sizeof(public));
+            memset(decompressed_point, 0, sizeof(decompressed_point));
+
+            /* Generate arbitrary EC point (public) on Curve */
+            if (!uECC_make_key(public, private, curves[c])) {
+                printf("uECC_make_key() failed\n");
+                continue;
+            }
+
+            /* compress and decompress point */
+            uECC_compress(public, compressed_point, curves[c]);
+            uECC_decompress(compressed_point, decompressed_point, curves[c]);
+
+            if (memcmp(public, decompressed_point, sizeof(public)) != 0) {
+                printf("Original and decompressed points are not identical!\n");
+                vli_print("Original point =     ", public, sizeof(public));
+                vli_print("Compressed point =   ", compressed_point, sizeof(compressed_point));
+                vli_print("Decompressed point = ", decompressed_point, sizeof(decompressed_point));
+            }
+        }
+        printf("\n");
+    }
+
+    return 0;
+}
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_compute.c
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_compute.c
@@ -0,0 +1,81 @@
+/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#include "uECC.h"
+
+#include <stdio.h>
+#include <string.h>
+
+void vli_print(char *str, uint8_t *vli, unsigned int size) {
+    printf("%s ", str);
+    for(unsigned i=0; i<size; ++i) {
+        printf("%02X ", (unsigned)vli[i]);
+    }
+    printf("\n");
+}
+
+int main() {
+    int i;
+    int success;
+    uint8_t private[32];
+    uint8_t public[64];
+    uint8_t public_computed[64];
+    
+    int c;
+    
+    const struct uECC_Curve_t * curves[5];
+    int num_curves = 0;
+#if uECC_SUPPORTS_secp160r1
+    curves[num_curves++] = uECC_secp160r1();
+#endif
+#if uECC_SUPPORTS_secp192r1
+    curves[num_curves++] = uECC_secp192r1();
+#endif
+#if uECC_SUPPORTS_secp224r1
+    curves[num_curves++] = uECC_secp224r1();
+#endif
+#if uECC_SUPPORTS_secp256r1
+    curves[num_curves++] = uECC_secp256r1();
+#endif
+#if uECC_SUPPORTS_secp256k1
+    curves[num_curves++] = uECC_secp256k1();
+#endif
+
+    printf("Testing 256 random private key pairs\n");
+    for (c = 0; c < num_curves; ++c) {
+        for (i = 0; i < 256; ++i) {
+            printf(".");
+            fflush(stdout);
+            
+            memset(public, 0, sizeof(public));
+            memset(public_computed, 0, sizeof(public_computed));
+            
+            if (!uECC_make_key(public, private, curves[c])) {
+                printf("uECC_make_key() failed\n");
+                continue;
+            }
+
+            if (!uECC_compute_public_key(private, public_computed, curves[c])) {
+                printf("uECC_compute_public_key() failed\n");
+            }
+
+            if (memcmp(public, public_computed, sizeof(public)) != 0) {
+                printf("Computed and provided public keys are not identical!\n");
+                vli_print("Computed public key = ", public_computed, sizeof(public_computed));
+                vli_print("Provided public key = ", public, sizeof(public));
+                vli_print("Private key = ", private, sizeof(private));
+            }
+        }
+        
+        printf("\n");
+        printf("Testing private key = 0\n");
+
+        memset(private, 0, sizeof(private));
+        success = uECC_compute_public_key(private, public_computed, curves[c]);
+        if (success) {
+            printf("uECC_compute_public_key() should have failed\n");
+        }
+        printf("\n");
+    }
+    
+    return 0;
+}
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdh.c
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdh.c
@@ -0,0 +1,90 @@
+/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#include "uECC.h"
+
+#include <stdio.h>
+#include <string.h>
+
+void vli_print(uint8_t *vli, unsigned int size) {
+    for(unsigned i=0; i<size; ++i) {
+        printf("%02X ", (unsigned)vli[i]);
+    }
+}
+
+int main() {
+    int i, c;
+    uint8_t private1[32] = {0};
+    uint8_t private2[32] = {0};
+    uint8_t public1[64] = {0};
+    uint8_t public2[64] = {0};
+    uint8_t secret1[32] = {0};
+    uint8_t secret2[32] = {0};
+    
+    const struct uECC_Curve_t * curves[5];
+    int num_curves = 0;
+#if uECC_SUPPORTS_secp160r1
+    curves[num_curves++] = uECC_secp160r1();
+#endif
+#if uECC_SUPPORTS_secp192r1
+    curves[num_curves++] = uECC_secp192r1();
+#endif
+#if uECC_SUPPORTS_secp224r1
+    curves[num_curves++] = uECC_secp224r1();
+#endif
+#if uECC_SUPPORTS_secp256r1
+    curves[num_curves++] = uECC_secp256r1();
+#endif
+#if uECC_SUPPORTS_secp256k1
+    curves[num_curves++] = uECC_secp256k1();
+#endif
+    
+    printf("Testing 256 random private key pairs\n");
+
+    for (c = 0; c < num_curves; ++c) {
+        for (i = 0; i < 256; ++i) {
+            printf(".");
+            fflush(stdout);
+
+            if (!uECC_make_key(public1, private1, curves[c]) ||
+                !uECC_make_key(public2, private2, curves[c])) {
+                printf("uECC_make_key() failed\n");
+                return 1;
+            }
+
+            if (!uECC_shared_secret(public2, private1, secret1, curves[c])) {
+                printf("shared_secret() failed (1)\n");
+                return 1;
+            }
+
+            if (!uECC_shared_secret(public1, private2, secret2, curves[c])) {
+                printf("shared_secret() failed (2)\n");
+                return 1;
+            }
+        
+            if (memcmp(secret1, secret2, sizeof(secret1)) != 0) {
+                printf("Shared secrets are not identical!\n");
+                printf("Private key 1 = ");
+                vli_print(private1, 32);
+                printf("\n");
+                printf("Private key 2 = ");
+                vli_print(private2, 32);
+                printf("\n");
+                printf("Public key 1 = ");
+                vli_print(public1, 64);
+                printf("\n");
+                printf("Public key 2 = ");
+                vli_print(public2, 64);
+                printf("\n");
+                printf("Shared secret 1 = ");
+                vli_print(secret1, 32);
+                printf("\n");
+                printf("Shared secret 2 = ");
+                vli_print(secret2, 32);
+                printf("\n");
+            }
+        }
+        printf("\n");
+    }
+    
+    return 0;
+}
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdsa.c
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdsa.c
@@ -0,0 +1,59 @@
+/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#include "uECC.h"
+
+#include <stdio.h>
+#include <string.h>
+
+int main() {
+    int i, c;
+    uint8_t private[32] = {0};
+    uint8_t public[64] = {0};
+    uint8_t hash[32] = {0};
+    uint8_t sig[64] = {0};
+
+    const struct uECC_Curve_t * curves[5];
+    int num_curves = 0;
+#if uECC_SUPPORTS_secp160r1
+    curves[num_curves++] = uECC_secp160r1();
+#endif
+#if uECC_SUPPORTS_secp192r1
+    curves[num_curves++] = uECC_secp192r1();
+#endif
+#if uECC_SUPPORTS_secp224r1
+    curves[num_curves++] = uECC_secp224r1();
+#endif
+#if uECC_SUPPORTS_secp256r1
+    curves[num_curves++] = uECC_secp256r1();
+#endif
+#if uECC_SUPPORTS_secp256k1
+    curves[num_curves++] = uECC_secp256k1();
+#endif
+    
+    printf("Testing 256 signatures\n");
+    for (c = 0; c < num_curves; ++c) {
+        for (i = 0; i < 256; ++i) {
+            printf(".");
+            fflush(stdout);
+
+            if (!uECC_make_key(public, private, curves[c])) {
+                printf("uECC_make_key() failed\n");
+                return 1;
+            }
+            memcpy(hash, public, sizeof(hash));
+            
+            if (!uECC_sign(private, hash, sizeof(hash), sig, curves[c])) {
+                printf("uECC_sign() failed\n");
+                return 1;
+            }
+
+            if (!uECC_verify(public, hash, sizeof(hash), sig, curves[c])) {
+                printf("uECC_verify() failed\n");
+                return 1;
+            }
+        }
+        printf("\n");
+    }
+    
+    return 0;
+}
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdsa_deterministic.c.example
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdsa_deterministic.c.example
@@ -0,0 +1,93 @@
+/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#include "uECC.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#define SHA256_BLOCK_LENGTH  64
+#define SHA256_DIGEST_LENGTH 32
+
+typedef struct SHA256_CTX {
+	uint32_t	state[8];
+	uint64_t	bitcount;
+	uint8_t	buffer[SHA256_BLOCK_LENGTH];
+} SHA256_CTX;
+
+extern void SHA256_Init(SHA256_CTX *ctx);
+extern void SHA256_Update(SHA256_CTX *ctx, const uint8_t *message, size_t message_size);
+extern void SHA256_Final(uint8_t digest[SHA256_DIGEST_LENGTH], SHA256_CTX *ctx);
+
+typedef struct SHA256_HashContext {
+    uECC_HashContext uECC;
+    SHA256_CTX ctx;
+} SHA256_HashContext;
+
+static void init_SHA256(const uECC_HashContext *base) {
+    SHA256_HashContext *context = (SHA256_HashContext *)base;
+    SHA256_Init(&context->ctx);
+}
+
+static void update_SHA256(const uECC_HashContext *base,
+                          const uint8_t *message,
+                          unsigned message_size) {
+    SHA256_HashContext *context = (SHA256_HashContext *)base;
+    SHA256_Update(&context->ctx, message, message_size);
+}
+
+static void finish_SHA256(const uECC_HashContext *base, uint8_t *hash_result) {
+    SHA256_HashContext *context = (SHA256_HashContext *)base;
+    SHA256_Final(hash_result, &context->ctx);
+}
+
+int main() {
+    int i, c;
+    uint8_t private[32] = {0};
+    uint8_t public[64] = {0};
+    uint8_t hash[32] = {0};
+    uint8_t sig[64] = {0};
+    
+    uint8_t tmp[2 * SHA256_DIGEST_LENGTH + SHA256_BLOCK_LENGTH];
+    SHA256_HashContext ctx = {{
+        &init_SHA256,
+        &update_SHA256,
+        &finish_SHA256,
+        SHA256_BLOCK_LENGTH,
+        SHA256_DIGEST_LENGTH,
+        tmp
+    }};
+
+    const struct uECC_Curve_t * curves[5];
+    curves[0] = uECC_secp160r1();
+    curves[1] = uECC_secp192r1();
+    curves[2] = uECC_secp224r1();
+    curves[3] = uECC_secp256r1();
+    curves[4] = uECC_secp256k1();
+    
+    printf("Testing 256 signatures\n");
+    for (c = 0; c < 5; ++c) {
+        for (i = 0; i < 256; ++i) {
+            printf(".");
+            fflush(stdout);
+
+            if (!uECC_make_key(public, private, curves[c])) {
+                printf("uECC_make_key() failed\n");
+                return 1;
+            }
+            memcpy(hash, public, sizeof(hash));
+            
+            if (!uECC_sign_deterministic(private, hash, sizeof(hash), &ctx.uECC, sig, curves[c])) {
+                printf("uECC_sign() failed\n");
+                return 1;
+            }
+
+            if (!uECC_verify(public, hash, sizeof(hash), sig, curves[c])) {
+                printf("uECC_verify() failed\n");
+                return 1;
+            }
+        }
+        printf("\n");
+    }
+    
+    return 0;
+}
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/types.h
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/types.h
@@ -0,0 +1,108 @@
+/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#ifndef _UECC_TYPES_H_
+#define _UECC_TYPES_H_
+
+#ifndef uECC_PLATFORM
+    #if __AVR__
+        #define uECC_PLATFORM uECC_avr
+    #elif defined(__thumb2__) || defined(_M_ARMT) /* I think MSVC only supports Thumb-2 targets */
+        #define uECC_PLATFORM uECC_arm_thumb2
+    #elif defined(__thumb__)
+        #define uECC_PLATFORM uECC_arm_thumb
+    #elif defined(__arm__) || defined(_M_ARM)
+        #define uECC_PLATFORM uECC_arm
+    #elif defined(__aarch64__)
+        #define uECC_PLATFORM uECC_arm64
+    #elif defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__I86__)
+        #define uECC_PLATFORM uECC_x86
+    #elif defined(__amd64__) || defined(_M_X64)
+        #define uECC_PLATFORM uECC_x86_64
+    #else
+        #define uECC_PLATFORM uECC_arch_other
+    #endif
+#endif
+
+#ifndef uECC_ARM_USE_UMAAL
+    #if (uECC_PLATFORM == uECC_arm) && (__ARM_ARCH >= 6)
+        #define uECC_ARM_USE_UMAAL 1
+    #elif (uECC_PLATFORM == uECC_arm_thumb2) && (__ARM_ARCH >= 6) && !__ARM_ARCH_7M__
+        #define uECC_ARM_USE_UMAAL 1
+    #else
+        #define uECC_ARM_USE_UMAAL 0
+    #endif
+#endif
+
+#ifndef uECC_WORD_SIZE
+    #if uECC_PLATFORM == uECC_avr
+        #define uECC_WORD_SIZE 1
+    #elif (uECC_PLATFORM == uECC_x86_64 || uECC_PLATFORM == uECC_arm64)
+        #define uECC_WORD_SIZE 8
+    #else
+        #define uECC_WORD_SIZE 4
+    #endif
+#endif
+
+#if (uECC_WORD_SIZE != 1) && (uECC_WORD_SIZE != 4) && (uECC_WORD_SIZE != 8)
+    #error "Unsupported value for uECC_WORD_SIZE"
+#endif
+
+#if ((uECC_PLATFORM == uECC_avr) && (uECC_WORD_SIZE != 1))
+    #pragma message ("uECC_WORD_SIZE must be 1 for AVR")
+    #undef uECC_WORD_SIZE
+    #define uECC_WORD_SIZE 1
+#endif
+
+#if ((uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \
+        uECC_PLATFORM ==  uECC_arm_thumb2) && \
+     (uECC_WORD_SIZE != 4))
+    #pragma message ("uECC_WORD_SIZE must be 4 for ARM")
+    #undef uECC_WORD_SIZE
+    #define uECC_WORD_SIZE 4
+#endif
+
+#if defined(__SIZEOF_INT128__) || ((__clang_major__ * 100 + __clang_minor__) >= 302)
+    #define SUPPORTS_INT128 1
+#else
+    #define SUPPORTS_INT128 0
+#endif
+
+typedef int8_t wordcount_t;
+typedef int16_t bitcount_t;
+typedef int8_t cmpresult_t;
+
+#if (uECC_WORD_SIZE == 1)
+
+typedef uint8_t uECC_word_t;
+typedef uint16_t uECC_dword_t;
+
+#define HIGH_BIT_SET 0x80
+#define uECC_WORD_BITS 8
+#define uECC_WORD_BITS_SHIFT 3
+#define uECC_WORD_BITS_MASK 0x07
+
+#elif (uECC_WORD_SIZE == 4)
+
+typedef uint32_t uECC_word_t;
+typedef uint64_t uECC_dword_t;
+
+#define HIGH_BIT_SET 0x80000000
+#define uECC_WORD_BITS 32
+#define uECC_WORD_BITS_SHIFT 5
+#define uECC_WORD_BITS_MASK 0x01F
+
+#elif (uECC_WORD_SIZE == 8)
+
+typedef uint64_t uECC_word_t;
+#if SUPPORTS_INT128
+typedef unsigned __int128 uECC_dword_t;
+#endif
+
+#define HIGH_BIT_SET 0x8000000000000000ull
+#define uECC_WORD_BITS 64
+#define uECC_WORD_BITS_SHIFT 6
+#define uECC_WORD_BITS_MASK 0x03F
+
+#endif /* uECC_WORD_SIZE */
+
+#endif /* _UECC_TYPES_H_ */
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC.c
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC.c
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC.h
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC.h
@@ -0,0 +1,365 @@
+/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#ifndef _UECC_H_
+#define _UECC_H_
+
+#include <stdint.h>
+
+/* Platform selection options.
+If uECC_PLATFORM is not defined, the code will try to guess it based on compiler macros.
+Possible values for uECC_PLATFORM are defined below: */
+#define uECC_arch_other 0
+#define uECC_x86        1
+#define uECC_x86_64     2
+#define uECC_arm        3
+#define uECC_arm_thumb  4
+#define uECC_arm_thumb2 5
+#define uECC_arm64      6
+#define uECC_avr        7
+
+/* If desired, you can define uECC_WORD_SIZE as appropriate for your platform (1, 4, or 8 bytes).
+If uECC_WORD_SIZE is not explicitly defined then it will be automatically set based on your
+platform. */
+
+/* Optimization level; trade speed for code size.
+   Larger values produce code that is faster but larger.
+   Currently supported values are 0 - 4; 0 is unusably slow for most applications.
+   Optimization level 4 currently only has an effect ARM platforms where more than one
+   curve is enabled. */
+#ifndef uECC_OPTIMIZATION_LEVEL
+    #define uECC_OPTIMIZATION_LEVEL 2
+#endif
+
+/* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a specific function to be
+used for (scalar) squaring instead of the generic multiplication function. This can make things
+faster somewhat faster, but increases the code size. */
+#ifndef uECC_SQUARE_FUNC
+    #define uECC_SQUARE_FUNC 0
+#endif
+
+/* uECC_VLI_NATIVE_LITTLE_ENDIAN - If enabled (defined as nonzero), this will switch to native
+little-endian format for *all* arrays passed in and out of the public API. This includes public 
+and private keys, shared secrets, signatures and message hashes. 
+Using this switch reduces the amount of call stack memory used by uECC, since less intermediate
+translations are required. 
+Note that this will *only* work on native little-endian processors and it will treat the uint8_t
+arrays passed into the public API as word arrays, therefore requiring the provided byte arrays 
+to be word aligned on architectures that do not support unaligned accesses.
+IMPORTANT: Keys and signatures generated with uECC_VLI_NATIVE_LITTLE_ENDIAN=1 are incompatible
+with keys and signatures generated with uECC_VLI_NATIVE_LITTLE_ENDIAN=0; all parties must use
+the same endianness. */
+#ifndef uECC_VLI_NATIVE_LITTLE_ENDIAN
+    #define uECC_VLI_NATIVE_LITTLE_ENDIAN 0
+#endif
+
+/* Curve support selection. Set to 0 to remove that curve. */
+#ifndef uECC_SUPPORTS_secp160r1
+    #define uECC_SUPPORTS_secp160r1 1
+#endif
+#ifndef uECC_SUPPORTS_secp192r1
+    #define uECC_SUPPORTS_secp192r1 1
+#endif
+#ifndef uECC_SUPPORTS_secp224r1
+    #define uECC_SUPPORTS_secp224r1 1
+#endif
+#ifndef uECC_SUPPORTS_secp256r1
+    #define uECC_SUPPORTS_secp256r1 1
+#endif
+#ifndef uECC_SUPPORTS_secp256k1
+    #define uECC_SUPPORTS_secp256k1 1
+#endif
+
+/* Specifies whether compressed point format is supported.
+   Set to 0 to disable point compression/decompression functions. */
+#ifndef uECC_SUPPORT_COMPRESSED_POINT
+    #define uECC_SUPPORT_COMPRESSED_POINT 1
+#endif
+
+struct uECC_Curve_t;
+typedef const struct uECC_Curve_t * uECC_Curve;
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if uECC_SUPPORTS_secp160r1
+uECC_Curve uECC_secp160r1(void);
+#endif
+#if uECC_SUPPORTS_secp192r1
+uECC_Curve uECC_secp192r1(void);
+#endif
+#if uECC_SUPPORTS_secp224r1
+uECC_Curve uECC_secp224r1(void);
+#endif
+#if uECC_SUPPORTS_secp256r1
+uECC_Curve uECC_secp256r1(void);
+#endif
+#if uECC_SUPPORTS_secp256k1
+uECC_Curve uECC_secp256k1(void);
+#endif
+
+/* uECC_RNG_Function type
+The RNG function should fill 'size' random bytes into 'dest'. It should return 1 if
+'dest' was filled with random data, or 0 if the random data could not be generated.
+The filled-in values should be either truly random, or from a cryptographically-secure PRNG.
+
+A correctly functioning RNG function must be set (using uECC_set_rng()) before calling
+uECC_make_key() or uECC_sign().
+
+Setting a correctly functioning RNG function improves the resistance to side-channel attacks
+for uECC_shared_secret() and uECC_sign_deterministic().
+
+A correct RNG function is set by default when building for Windows, Linux, or OS X.
+If you are building on another POSIX-compliant system that supports /dev/random or /dev/urandom,
+you can define uECC_POSIX to use the predefined RNG. For embedded platforms there is no predefined
+RNG function; you must provide your own.
+*/
+typedef int (*uECC_RNG_Function)(uint8_t *dest, unsigned size);
+
+/* uECC_set_rng() function.
+Set the function that will be used to generate random bytes. The RNG function should
+return 1 if the random data was generated, or 0 if the random data could not be generated.
+
+On platforms where there is no predefined RNG function (eg embedded platforms), this must
+be called before uECC_make_key() or uECC_sign() are used.
+
+Inputs:
+    rng_function - The function that will be used to generate random bytes.
+*/
+void uECC_set_rng(uECC_RNG_Function rng_function);
+
+/* uECC_get_rng() function.
+
+Returns the function that will be used to generate random bytes.
+*/
+uECC_RNG_Function uECC_get_rng(void);
+
+/* uECC_curve_private_key_size() function.
+
+Returns the size of a private key for the curve in bytes.
+*/
+int uECC_curve_private_key_size(uECC_Curve curve);
+
+/* uECC_curve_public_key_size() function.
+
+Returns the size of a public key for the curve in bytes.
+*/
+int uECC_curve_public_key_size(uECC_Curve curve);
+
+/* uECC_make_key() function.
+Create a public/private key pair.
+
+Outputs:
+    public_key  - Will be filled in with the public key. Must be at least 2 * the curve size
+                  (in bytes) long. For example, if the curve is secp256r1, public_key must be 64
+                  bytes long.
+    private_key - Will be filled in with the private key. Must be as long as the curve order; this
+                  is typically the same as the curve size, except for secp160r1. For example, if the
+                  curve is secp256r1, private_key must be 32 bytes long.
+
+                  For secp160r1, private_key must be 21 bytes long! Note that the first byte will
+                  almost always be 0 (there is about a 1 in 2^80 chance of it being non-zero).
+
+Returns 1 if the key pair was generated successfully, 0 if an error occurred.
+*/
+int uECC_make_key(uint8_t *public_key, uint8_t *private_key, uECC_Curve curve);
+
+/* uECC_shared_secret() function.
+Compute a shared secret given your secret key and someone else's public key.
+Note: It is recommended that you hash the result of uECC_shared_secret() before using it for
+symmetric encryption or HMAC.
+
+Inputs:
+    public_key  - The public key of the remote party.
+    private_key - Your private key.
+
+Outputs:
+    secret - Will be filled in with the shared secret value. Must be the same size as the
+             curve size; for example, if the curve is secp256r1, secret must be 32 bytes long.
+
+Returns 1 if the shared secret was generated successfully, 0 if an error occurred.
+*/
+int uECC_shared_secret(const uint8_t *public_key,
+                       const uint8_t *private_key,
+                       uint8_t *secret,
+                       uECC_Curve curve);
+
+#if uECC_SUPPORT_COMPRESSED_POINT
+/* uECC_compress() function.
+Compress a public key.
+
+Inputs:
+    public_key - The public key to compress.
+
+Outputs:
+    compressed - Will be filled in with the compressed public key. Must be at least
+                 (curve size + 1) bytes long; for example, if the curve is secp256r1,
+                 compressed must be 33 bytes long.
+*/
+void uECC_compress(const uint8_t *public_key, uint8_t *compressed, uECC_Curve curve);
+
+/* uECC_decompress() function.
+Decompress a compressed public key.
+
+Inputs:
+    compressed - The compressed public key.
+
+Outputs:
+    public_key - Will be filled in with the decompressed public key.
+*/
+void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, uECC_Curve curve);
+#endif /* uECC_SUPPORT_COMPRESSED_POINT */
+
+/* uECC_valid_public_key() function.
+Check to see if a public key is valid.
+
+Note that you are not required to check for a valid public key before using any other uECC
+functions. However, you may wish to avoid spending CPU time computing a shared secret or
+verifying a signature using an invalid public key.
+
+Inputs:
+    public_key - The public key to check.
+
+Returns 1 if the public key is valid, 0 if it is invalid.
+*/
+int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve);
+
+/* uECC_compute_public_key() function.
+Compute the corresponding public key for a private key.
+
+Inputs:
+    private_key - The private key to compute the public key for
+
+Outputs:
+    public_key - Will be filled in with the corresponding public key
+
+Returns 1 if the key was computed successfully, 0 if an error occurred.
+*/
+int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, uECC_Curve curve);
+
+/* uECC_sign() function.
+Generate an ECDSA signature for a given hash value.
+
+Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pass it in to
+this function along with your private key.
+
+Inputs:
+    private_key  - Your private key.
+    message_hash - The hash of the message to sign.
+    hash_size    - The size of message_hash in bytes.
+
+Outputs:
+    signature - Will be filled in with the signature value. Must be at least 2 * curve size long.
+                For example, if the curve is secp256r1, signature must be 64 bytes long.
+
+Returns 1 if the signature generated successfully, 0 if an error occurred.
+*/
+int uECC_sign(const uint8_t *private_key,
+              const uint8_t *message_hash,
+              unsigned hash_size,
+              uint8_t *signature,
+              uECC_Curve curve);
+
+/* uECC_HashContext structure.
+This is used to pass in an arbitrary hash function to uECC_sign_deterministic().
+The structure will be used for multiple hash computations; each time a new hash
+is computed, init_hash() will be called, followed by one or more calls to
+update_hash(), and finally a call to finish_hash() to produce the resulting hash.
+
+The intention is that you will create a structure that includes uECC_HashContext
+followed by any hash-specific data. For example:
+
+typedef struct SHA256_HashContext {
+    uECC_HashContext uECC;
+    SHA256_CTX ctx;
+} SHA256_HashContext;
+
+void init_SHA256(uECC_HashContext *base) {
+    SHA256_HashContext *context = (SHA256_HashContext *)base;
+    SHA256_Init(&context->ctx);
+}
+
+void update_SHA256(uECC_HashContext *base,
+                   const uint8_t *message,
+                   unsigned message_size) {
+    SHA256_HashContext *context = (SHA256_HashContext *)base;
+    SHA256_Update(&context->ctx, message, message_size);
+}
+
+void finish_SHA256(uECC_HashContext *base, uint8_t *hash_result) {
+    SHA256_HashContext *context = (SHA256_HashContext *)base;
+    SHA256_Final(hash_result, &context->ctx);
+}
+
+... when signing ...
+{
+    uint8_t tmp[32 + 32 + 64];
+    SHA256_HashContext ctx = {{&init_SHA256, &update_SHA256, &finish_SHA256, 64, 32, tmp}};
+    uECC_sign_deterministic(key, message_hash, &ctx.uECC, signature);
+}
+*/
+typedef struct uECC_HashContext {
+    void (*init_hash)(const struct uECC_HashContext *context);
+    void (*update_hash)(const struct uECC_HashContext *context,
+                        const uint8_t *message,
+                        unsigned message_size);
+    void (*finish_hash)(const struct uECC_HashContext *context, uint8_t *hash_result);
+    unsigned block_size; /* Hash function block size in bytes, eg 64 for SHA-256. */
+    unsigned result_size; /* Hash function result size in bytes, eg 32 for SHA-256. */
+    uint8_t *tmp; /* Must point to a buffer of at least (2 * result_size + block_size) bytes. */
+} uECC_HashContext;
+
+/* uECC_sign_deterministic() function.
+Generate an ECDSA signature for a given hash value, using a deterministic algorithm
+(see RFC 6979). You do not need to set the RNG using uECC_set_rng() before calling
+this function; however, if the RNG is defined it will improve resistance to side-channel
+attacks.
+
+Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pass it to
+this function along with your private key and a hash context. Note that the message_hash
+does not need to be computed with the same hash function used by hash_context.
+
+Inputs:
+    private_key  - Your private key.
+    message_hash - The hash of the message to sign.
+    hash_size    - The size of message_hash in bytes.
+    hash_context - A hash context to use.
+
+Outputs:
+    signature - Will be filled in with the signature value.
+
+Returns 1 if the signature generated successfully, 0 if an error occurred.
+*/
+int uECC_sign_deterministic(const uint8_t *private_key,
+                            const uint8_t *message_hash,
+                            unsigned hash_size,
+                            const uECC_HashContext *hash_context,
+                            uint8_t *signature,
+                            uECC_Curve curve);
+
+/* uECC_verify() function.
+Verify an ECDSA signature.
+
+Usage: Compute the hash of the signed data using the same hash as the signer and
+pass it to this function along with the signer's public key and the signature values (r and s).
+
+Inputs:
+    public_key   - The signer's public key.
+    message_hash - The hash of the signed data.
+    hash_size    - The size of message_hash in bytes.
+    signature    - The signature value.
+
+Returns 1 if the signature is valid, 0 if it is invalid.
+*/
+int uECC_verify(const uint8_t *public_key,
+                const uint8_t *message_hash,
+                unsigned hash_size,
+                const uint8_t *signature,
+                uECC_Curve curve);
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* _UECC_H_ */
--- a/components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC_vli.h
+++ b/components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC_vli.h
@@ -0,0 +1,172 @@
+/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
+
+#ifndef _UECC_VLI_H_
+#define _UECC_VLI_H_
+
+#include "uECC.h"
+#include "types.h"
+
+/* Functions for raw large-integer manipulation. These are only available
+   if uECC.c is compiled with uECC_ENABLE_VLI_API defined to 1. */
+#ifndef uECC_ENABLE_VLI_API
+    #define uECC_ENABLE_VLI_API 0
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if uECC_ENABLE_VLI_API
+
+void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words);
+
+/* Constant-time comparison to zero - secure way to compare long integers */
+/* Returns 1 if vli == 0, 0 otherwise. */
+uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, wordcount_t num_words);
+
+/* Returns nonzero if bit 'bit' of vli is set. */
+uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, bitcount_t bit);
+
+/* Counts the number of bits required to represent vli. */
+bitcount_t uECC_vli_numBits(const uECC_word_t *vli, const wordcount_t max_words);
+
+/* Sets dest = src. */
+void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words);
+
+/* Constant-time comparison function - secure way to compare long integers */
+/* Returns one if left == right, zero otherwise */
+uECC_word_t uECC_vli_equal(const uECC_word_t *left,
+                           const uECC_word_t *right,
+                           wordcount_t num_words);
+
+/* Constant-time comparison function - secure way to compare long integers */
+/* Returns sign of left - right, in constant time. */
+cmpresult_t uECC_vli_cmp(const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words);
+
+/* Computes vli = vli >> 1. */
+void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words);
+
+/* Computes result = left + right, returning carry. Can modify in place. */
+uECC_word_t uECC_vli_add(uECC_word_t *result,
+                         const uECC_word_t *left,
+                         const uECC_word_t *right,
+                         wordcount_t num_words);
+
+/* Computes result = left - right, returning borrow. Can modify in place. */
+uECC_word_t uECC_vli_sub(uECC_word_t *result,
+                         const uECC_word_t *left,
+                         const uECC_word_t *right,
+                         wordcount_t num_words);
+
+/* Computes result = left * right. Result must be 2 * num_words long. */
+void uECC_vli_mult(uECC_word_t *result,
+                   const uECC_word_t *left,
+                   const uECC_word_t *right,
+                   wordcount_t num_words);
+
+/* Computes result = left^2. Result must be 2 * num_words long. */
+void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, wordcount_t num_words);
+
+/* Computes result = (left + right) % mod.
+   Assumes that left < mod and right < mod, and that result does not overlap mod. */
+void uECC_vli_modAdd(uECC_word_t *result,
+                     const uECC_word_t *left,
+                     const uECC_word_t *right,
+                     const uECC_word_t *mod,
+                     wordcount_t num_words);
+
+/* Computes result = (left - right) % mod.
+   Assumes that left < mod and right < mod, and that result does not overlap mod. */
+void uECC_vli_modSub(uECC_word_t *result,
+                     const uECC_word_t *left,
+                     const uECC_word_t *right,
+                     const uECC_word_t *mod,
+                     wordcount_t num_words);
+
+/* Computes result = product % mod, where product is 2N words long.
+   Currently only designed to work for mod == curve->p or curve_n. */
+void uECC_vli_mmod(uECC_word_t *result,
+                   uECC_word_t *product,
+                   const uECC_word_t *mod,
+                   wordcount_t num_words);
+
+/* Calculates result = product (mod curve->p), where product is up to
+   2 * curve->num_words long. */
+void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, uECC_Curve curve);
+
+/* Computes result = (left * right) % mod.
+   Currently only designed to work for mod == curve->p or curve_n. */
+void uECC_vli_modMult(uECC_word_t *result,
+                      const uECC_word_t *left,
+                      const uECC_word_t *right,
+                      const uECC_word_t *mod,
+                      wordcount_t num_words);
+
+/* Computes result = (left * right) % curve->p. */
+void uECC_vli_modMult_fast(uECC_word_t *result,
+                           const uECC_word_t *left,
+                           const uECC_word_t *right,
+                           uECC_Curve curve);
+
+/* Computes result = left^2 % mod.
+   Currently only designed to work for mod == curve->p or curve_n. */
+void uECC_vli_modSquare(uECC_word_t *result,
+                        const uECC_word_t *left,
+                        const uECC_word_t *mod,
+                        wordcount_t num_words);
+
+/* Computes result = left^2 % curve->p. */
+void uECC_vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left, uECC_Curve curve);
+
+/* Computes result = (1 / input) % mod.*/
+void uECC_vli_modInv(uECC_word_t *result,
+                     const uECC_word_t *input,
+                     const uECC_word_t *mod,
+                     wordcount_t num_words);
+
+#if uECC_SUPPORT_COMPRESSED_POINT
+/* Calculates a = sqrt(a) (mod curve->p) */
+void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve);
+#endif
+
+/* Converts an integer in uECC native format to big-endian bytes. */
+void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, const uECC_word_t *native);
+/* Converts big-endian bytes to an integer in uECC native format. */
+void uECC_vli_bytesToNative(uECC_word_t *native, const uint8_t *bytes, int num_bytes);
+
+unsigned uECC_curve_num_words(uECC_Curve curve);
+unsigned uECC_curve_num_bytes(uECC_Curve curve);
+unsigned uECC_curve_num_bits(uECC_Curve curve);
+unsigned uECC_curve_num_n_words(uECC_Curve curve);
+unsigned uECC_curve_num_n_bytes(uECC_Curve curve);
+unsigned uECC_curve_num_n_bits(uECC_Curve curve);
+
+const uECC_word_t *uECC_curve_p(uECC_Curve curve);
+const uECC_word_t *uECC_curve_n(uECC_Curve curve);
+const uECC_word_t *uECC_curve_G(uECC_Curve curve);
+const uECC_word_t *uECC_curve_b(uECC_Curve curve);
+
+int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve);
+
+/* Multiplies a point by a scalar. Points are represented by the X coordinate followed by
+   the Y coordinate in the same array, both coordinates are curve->num_words long. Note
+   that scalar must be curve->num_n_words long (NOT curve->num_words). */
+void uECC_point_mult(uECC_word_t *result,
+                     const uECC_word_t *point,
+                     const uECC_word_t *scalar,
+                     uECC_Curve curve);
+
+/* Generates a random integer in the range 0 < random < top.
+   Both random and top have num_words words. */
+int uECC_generate_random_int(uECC_word_t *random,
+                             const uECC_word_t *top,
+                             wordcount_t num_words);
+
+#endif /* uECC_ENABLE_VLI_API */
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* _UECC_VLI_H_ */
--- a/components/bootloader/subproject/main/CMakeLists.txt
+++ b/components/bootloader/subproject/main/CMakeLists.txt
@@ -0,0 +1,8 @@
+idf_component_register(SRCS "bootloader_start.c"
+                    REQUIRES bootloader bootloader_support)
+
+idf_build_get_property(target IDF_TARGET)
+set(scripts "${target}.bootloader.ld"
+            "${target}.bootloader.rom.ld")
+
+target_linker_script(${COMPONENT_LIB} INTERFACE "${scripts}")
--- a/components/bootloader/subproject/main/Makefile.projbuild
+++ b/components/bootloader/subproject/main/Makefile.projbuild
@@ -0,0 +1,4 @@
+# Submodules normally added in component.mk, but fully qualified
+# paths can be added at this level (we need binary librtc to be
+# available to link bootloader).
+COMPONENT_SUBMODULES += $(IDF_PATH)/components/esp_wifi/lib_esp32
--- a/components/bootloader/subproject/main/bootloader_start.c
+++ b/components/bootloader/subproject/main/bootloader_start.c
@@ -0,0 +1,126 @@
+// Copyright 2015-2016 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <string.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "esp_log.h"
+#include "esp32/rom/gpio.h"
+#include "esp32/rom/spi_flash.h"
+#include "bootloader_config.h"
+#include "bootloader_init.h"
+#include "bootloader_utility.h"
+#include "bootloader_common.h"
+#include "sdkconfig.h"
+#include "esp_image_format.h"
+
+static const char* TAG = "boot";
+
+static int select_partition_number (bootloader_state_t *bs);
+static int selected_boot_partition(const bootloader_state_t *bs);
+#define LWS_MAGIC_REBOOT_TYPE_ADS 0x50001ffc
+#define LWS_MAGIC_REBOOT_TYPE_REQ_FACTORY 0xb00bcafe
+#define LWS_MAGIC_REBOOT_TYPE_FORCED_FACTORY 0xfaceb00b
+#define LWS_MAGIC_REBOOT_TYPE_FORCED_FACTORY_BUTTON 0xf0cedfac
+#define LWS_MAGIC_REBOOT_TYPE_REQ_FACTORY_ERASE_OTA 0xfac0eeee
+
+/*
+ * We arrive here after the ROM bootloader finished loading this second stage bootloader from flash.
+ * The hardware is mostly uninitialized, flash cache is down and the app CPU is in reset.
+ * We do have a stack, so we can do the initialization in C.
+ */
+void __attribute__((noreturn)) call_start_cpu0()
+{
+    // 1. Hardware initialization
+    if (bootloader_init() != ESP_OK) {
+        bootloader_reset();
+    }
+
+    // 2. Select the number of boot partition
+    bootloader_state_t bs = { 0 };
+    int boot_index = select_partition_number(&bs);
+    if (boot_index == INVALID_INDEX) {
+        bootloader_reset();
+    }
+
+    // 3. Load the app image for booting
+    bootloader_utility_load_boot_image(&bs, boot_index);
+}
+
+// Select the number of boot partition
+static int select_partition_number (bootloader_state_t *bs)
+{
+    // 1. Load partition table
+    if (!bootloader_utility_load_partition_table(bs)) {
+        ESP_LOGE(TAG, "load partition table error!");
+        return INVALID_INDEX;
+    }
+
+    // 2. Select the number of boot partition
+    return selected_boot_partition(bs);
+}
+
+/*
+ * Selects a boot partition.
+ * The conditions for switching to another firmware are checked.
+ */
+static int selected_boot_partition(const bootloader_state_t *bs)
+{
+    int boot_index = bootloader_utility_get_selected_boot_partition(bs);
+    if (boot_index == INVALID_INDEX) {
+        return boot_index; // Unrecoverable failure (not due to corrupt ota data or bad partition contents)
+    } else {
+        // Factory firmware.
+#ifdef CONFIG_BOOTLOADER_FACTORY_RESET
+        if (bootloader_common_check_long_hold_gpio(CONFIG_BOOTLOADER_NUM_PIN_FACTORY_RESET, CONFIG_BOOTLOADER_HOLD_TIME_GPIO) == 1) {
+            ESP_LOGI(TAG, "Detect a condition of the factory reset");
+            bool ota_data_erase = false;
+#ifdef CONFIG_BOOTLOADER_OTA_DATA_ERASE
+            ota_data_erase = true;
+#endif
+            const char *list_erase = CONFIG_BOOTLOADER_DATA_FACTORY_RESET;
+            ESP_LOGI(TAG, "Data partitions to erase: %s", list_erase);
+            if (bootloader_common_erase_part_type_data(list_erase, ota_data_erase) == false) {
+                ESP_LOGE(TAG, "Not all partitions were erased");
+            }
+            return bootloader_utility_get_selected_boot_partition(bs);
+        }
+#endif
+       // TEST firmware.
+#ifdef CONFIG_BOOTLOADER_APP_TEST
+        if (bootloader_common_check_long_hold_gpio(CONFIG_BOOTLOADER_NUM_PIN_APP_TEST, CONFIG_BOOTLOADER_HOLD_TIME_GPIO) == 1) {
+            ESP_LOGI(TAG, "Detect a boot condition of the test firmware");
+            if (bs->test.offset != 0) {
+                boot_index = TEST_APP_INDEX;
+                return boot_index;
+            } else {
+                ESP_LOGE(TAG, "Test firmware is not found in partition table");
+                return INVALID_INDEX;
+            }
+        }
+#endif
+        uint32_t *p_force_factory_magic = (uint32_t *)LWS_MAGIC_REBOOT_TYPE_ADS;
+        if(*p_force_factory_magic == LWS_MAGIC_REBOOT_TYPE_REQ_FACTORY){
+        	boot_index=FACTORY_INDEX;
+        }
+
+    }
+    return boot_index;
+}
+
+// Return global reent struct if any newlib functions are linked to bootloader
+struct _reent* __getreent() {
+    return _GLOBAL_REENT;
+}
+
--- a/components/bootloader/subproject/main/component.mk
+++ b/components/bootloader/subproject/main/component.mk
@@ -0,0 +1,21 @@
+#
+# Main bootloader Makefile.
+#
+# This is basically the same as a component makefile, but in the case of the bootloader
+# we pull in bootloader-specific linker arguments.
+#
+
+LINKER_SCRIPTS := \
+    $(IDF_TARGET).bootloader.ld \
+    $(IDF_TARGET).bootloader.rom.ld \
+    $(IDF_PATH)/components/esp_rom/$(IDF_TARGET)/ld/$(IDF_TARGET).rom.ld \
+    $(IDF_PATH)/components/esp_rom/$(IDF_TARGET)/ld/$(IDF_TARGET).rom.newlib-funcs.ld \
+    $(IDF_PATH)/components/$(IDF_TARGET)/ld/$(IDF_TARGET).peripherals.ld
+
+ifndef CONFIG_SPI_FLASH_ROM_DRIVER_PATCH
+LINKER_SCRIPTS += $(IDF_PATH)/components/esp_rom/$(IDF_TARGET)/ld/$(IDF_TARGET).rom.spiflash.ld
+endif
+
+COMPONENT_ADD_LDFLAGS += -L $(COMPONENT_PATH) $(addprefix -T ,$(LINKER_SCRIPTS))
+
+COMPONENT_ADD_LINKER_DEPS := $(LINKER_SCRIPTS)
--- a/components/bootloader/subproject/main/esp32.bootloader.ld
+++ b/components/bootloader/subproject/main/esp32.bootloader.ld
@@ -0,0 +1,167 @@
+/*
+Linker file used to link the bootloader.
+*/
+
+
+/* Simplified memory map for the bootloader
+
+   The main purpose is to make sure the bootloader can load into main memory
+   without overwriting itself.
+*/
+
+MEMORY
+{
+  /* I/O */
+  dport0_seg (RW) :                 	org = 0x3FF00000, len = 0x10
+  /* IRAM POOL1, used for APP CPU cache. Bootloader runs from here during the final stage of loading the app because APP CPU is still held in reset, the main app enables APP CPU cache */
+  iram_loader_seg (RWX) :           org = 0x40078000, len = 0x8000  /* 32KB, APP CPU cache */
+  /* 63kB, IRAM. We skip the first 1k to prevent the entry point being
+     placed into the same range as exception vectors in the app.
+     This leads to idf_monitor decoding ROM bootloader "entry 0x40080xxx"
+     message as one of the exception vectors, which looks scary to users.
+  */
+  iram_seg (RWX) :                  org = 0x40080400, len = 0xfc00
+  /* 64k at the end of DRAM, after ROM bootloader stack */
+  dram_seg (RW) :                  	org = 0x3FFF0000, len = 0x10000
+}
+
+/*  Default entry point:  */
+ENTRY(call_start_cpu0);
+
+
+SECTIONS
+{
+
+  .iram_loader.text :
+  {
+    . = ALIGN (16);
+    _loader_text_start = ABSOLUTE(.);
+    *(.stub .gnu.warning .gnu.linkonce.literal.* .gnu.linkonce.t.*.literal .gnu.linkonce.t.*)
+     *(.iram1 .iram1.*) /* catch stray IRAM_ATTR */
+    *liblog.a:(.literal .text .literal.* .text.*)
+    *libgcc.a:(.literal .text .literal.* .text.*)
+    *libbootloader_support.a:bootloader_common.*(.literal .text .literal.* .text.*)
+    *libbootloader_support.a:bootloader_flash.*(.literal .text .literal.* .text.*)
+    *libbootloader_support.a:bootloader_random.*(.literal .text .literal.* .text.*)
+    *libbootloader_support.a:bootloader_utility.*(.literal .text .literal.* .text.*)
+    *libbootloader_support.a:bootloader_sha.*(.literal .text .literal.* .text.*)
+    *libbootloader_support.a:esp_image_format.*(.literal .text .literal.* .text.*)
+    *libbootloader_support.a:flash_encrypt.*(.literal .text .literal.* .text.*)
+    *libbootloader_support.a:flash_partitions.*(.literal .text .literal.* .text.*)
+    *libbootloader_support.a:secure_boot.*(.literal .text .literal.* .text.*)
+    *libbootloader_support.a:secure_boot_signatures.*(.literal .text .literal.* .text.*)
+    *libmicro-ecc.a:*.*(.literal .text .literal.* .text.*)
+    *libspi_flash.a:*.*(.literal .text .literal.* .text.*)
+    *libsoc.a:rtc_wdt.*(.literal .text .literal.* .text.*)
+    *libefuse.a:*.*(.literal .text .literal.* .text.*)
+    *(.fini.literal)
+    *(.fini)
+    *(.gnu.version)
+    _loader_text_end = ABSOLUTE(.);
+  } > iram_loader_seg
+
+  .iram.text :
+  {
+    . = ALIGN (16);
+    *(.entry.text)
+    *(.init.literal)
+    *(.init)
+  } > iram_seg
+
+
+  /* Shared RAM */
+  .dram0.bss (NOLOAD) :
+  {
+    . = ALIGN (8);
+    _bss_start = ABSOLUTE(.);
+    *(.dynsbss)
+    *(.sbss)
+    *(.sbss.*)
+    *(.gnu.linkonce.sb.*)
+    *(.scommon)
+    *(.sbss2)
+    *(.sbss2.*)
+    *(.gnu.linkonce.sb2.*)
+    *(.dynbss)
+    *(.bss)
+    *(.bss.*)
+    *(.gnu.linkonce.b.*)
+    *(COMMON)
+    . = ALIGN (8);
+    _bss_end = ABSOLUTE(.);
+  } >dram_seg
+
+  .dram0.data :
+  {
+    _data_start = ABSOLUTE(.);
+    *(.data)
+    *(.data.*)
+    *(.gnu.linkonce.d.*)
+    *(.data1)
+    *(.sdata)
+    *(.sdata.*)
+    *(.gnu.linkonce.s.*)
+    *(.sdata2)
+    *(.sdata2.*)
+    *(.gnu.linkonce.s2.*)
+    *(.jcr)
+    _data_end = ABSOLUTE(.);
+  } >dram_seg
+
+  .dram0.rodata :
+  {
+    _rodata_start = ABSOLUTE(.);
+    *(.rodata)
+    *(.rodata.*)
+    *(.gnu.linkonce.r.*)
+    *(.rodata1)
+    __XT_EXCEPTION_TABLE_ = ABSOLUTE(.);
+    *(.xt_except_table)
+    *(.gcc_except_table)
+    *(.gnu.linkonce.e.*)
+    *(.gnu.version_r)
+    *(.eh_frame)
+    . = (. + 3) & ~ 3;
+    /*  C++ constructor and destructor tables, properly ordered:  */
+    __init_array_start = ABSOLUTE(.);
+    KEEP (*crtbegin.*(.ctors))
+    KEEP (*(EXCLUDE_FILE (*crtend.*) .ctors))
+    KEEP (*(SORT(.ctors.*)))
+    KEEP (*(.ctors))
+    __init_array_end = ABSOLUTE(.);
+    KEEP (*crtbegin.*(.dtors))
+    KEEP (*(EXCLUDE_FILE (*crtend.*) .dtors))
+    KEEP (*(SORT(.dtors.*)))
+    KEEP (*(.dtors))
+    /*  C++ exception handlers table:  */
+    __XT_EXCEPTION_DESCS_ = ABSOLUTE(.);
+    *(.xt_except_desc)
+    *(.gnu.linkonce.h.*)
+    __XT_EXCEPTION_DESCS_END__ = ABSOLUTE(.);
+    *(.xt_except_desc_end)
+    *(.dynamic)
+    *(.gnu.version_d)
+    _rodata_end = ABSOLUTE(.);
+	/* Literals are also RO data. */
+    _lit4_start = ABSOLUTE(.);
+    *(*.lit4)
+    *(.lit4.*)
+    *(.gnu.linkonce.lit4.*)
+    _lit4_end = ABSOLUTE(.);
+    . = ALIGN(4);
+  } >dram_seg
+
+  .iram.text :
+  {
+    _stext = .;
+    _text_start = ABSOLUTE(.);
+    *(.literal .text .literal.* .text.* .stub .gnu.warning .gnu.linkonce.literal.* .gnu.linkonce.t.*.literal .gnu.linkonce.t.*)
+    *(.iram .iram.*) /* catch stray IRAM_ATTR */
+    *(.fini.literal)
+    *(.fini)
+    *(.gnu.version)
+    _text_end = ABSOLUTE(.);
+    _etext = .;
+  } > iram_seg
+
+}
--- a/components/bootloader/subproject/main/esp32.bootloader.rom.ld
+++ b/components/bootloader/subproject/main/esp32.bootloader.rom.ld
@@ -0,0 +1,9 @@
+PROVIDE ( ets_update_cpu_frequency = 0x40008550 ); /* Updates g_ticks_per_us on the current CPU only; not on the other core */
+PROVIDE ( MD5Final = 0x4005db1c );
+PROVIDE ( MD5Init = 0x4005da7c );
+PROVIDE ( MD5Update = 0x4005da9c );
+
+/* bootloader will use following functions from xtensa hal library */
+xthal_get_ccount = 0x4000c050;
+xthal_get_ccompare = 0x4000c078;
+xthal_set_ccompare = 0x4000c058;