From d225e673c71bc57066fa4260d95316bf73b67f95 Mon Sep 17 00:00:00 2001 From: Vadim Vetrov Date: Sun, 2 Feb 2025 19:08:47 +0300 Subject: [PATCH 1/4] Implement Aho-Corasick algorithm --- Kbuild | 2 +- src/trie.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++++ src/trie.h | 99 ++++++++++++++++++++++++++ src/types.h | 1 + test/main_fn.c | 1 + test/trie.c | 104 ++++++++++++++++++++++++++++ uspace.mk | 2 +- 7 files changed, 390 insertions(+), 2 deletions(-) create mode 100644 src/trie.c create mode 100644 src/trie.h create mode 100644 test/trie.c diff --git a/Kbuild b/Kbuild index de094d3..adb8dd1 100644 --- a/Kbuild +++ b/Kbuild @@ -1,3 +1,3 @@ obj-m := kyoutubeUnblock.o -kyoutubeUnblock-objs := src/kytunblock.o src/mangle.o src/quic.o src/quic_crypto.o src/utils.o src/tls.o src/getopt.o src/inet_ntop.o src/args.o deps/cyclone/aes.o deps/cyclone/cpu_endian.o deps/cyclone/ecb.o deps/cyclone/gcm.o deps/cyclone/hkdf.o deps/cyclone/hmac.o deps/cyclone/sha256.o +kyoutubeUnblock-objs := src/kytunblock.o src/mangle.o src/quic.o src/quic_crypto.o src/utils.o src/tls.o src/getopt.o src/inet_ntop.o src/args.o src/trie.o deps/cyclone/aes.o deps/cyclone/cpu_endian.o deps/cyclone/ecb.o deps/cyclone/gcm.o deps/cyclone/hkdf.o deps/cyclone/hmac.o deps/cyclone/sha256.o ccflags-y := -std=gnu99 -DKERNEL_SPACE -Wno-error -Wno-declaration-after-statement -I$(src)/src -I$(src)/deps/cyclone/include diff --git a/src/trie.c b/src/trie.c new file mode 100644 index 0000000..1d4af2a --- /dev/null +++ b/src/trie.c @@ -0,0 +1,183 @@ +/* + youtubeUnblock - https://github.com/Waujito/youtubeUnblock + + Copyright (C) 2024-2025 Vadim Vetrov + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * This is slightly optimized Aho-Corasick implementation + * + * Big thanks to e-maxx http://e-maxx.ru/algo/aho_corasick + * for the best description and reference code samples + */ + +#include "trie.h" + +int trie_init(struct trie_container *trie) { + void *vx = malloc(sizeof(struct trie_vertex) * TRIE_STARTSZ); + if (vx == NULL) { + return -ENOMEM; + } + trie->vx = vx; + trie->arrsz = TRIE_STARTSZ; + trie->sz = 1; + + struct trie_vertex *trx = trie->vx; + trx->p = trx->link = -1; + trx->leaf = 0; + trx->depth = 0; + trx->pch = 0; + memset(trx->go, 0xff, sizeof(trie->vx[0].go)); + + return 0; +} + +void trie_destroy(struct trie_container *trie) { + trie->arrsz = 0; + trie->sz = 0; + free(trie->vx); + trie->vx = NULL; +} + +int trie_push_vertex(struct trie_container *trie) { + if (trie->sz == NMAX - 1) { + return -EINVAL; + } + + if (trie->arrsz == trie->sz) { // realloc + void *pt = realloc(trie->vx, + sizeof(struct trie_vertex) * trie->arrsz * 2); + if (pt == NULL) { + return -ENOMEM; + } + + trie->arrsz *= 2; + trie->vx = pt; + } + + return trie->sz++; +} + + +int trie_add_string(struct trie_container *trie, + const uint8_t *str, size_t strlen) { + int v = 0; + int nv; + + for (size_t i = 0; i < strlen; ++i) { + uint8_t c = str[i]; + if (c >= TRIE_ALPHABET) { + return -EINVAL; + } + + if (trie->vx[v].go[c] == -1) { + nv = trie_push_vertex(trie); + if (nv < 0) { + return nv; + } + struct trie_vertex *tvx = trie->vx + nv; + + memset(tvx->go, 0xff, sizeof(tvx->go)); + tvx->link = -1; + tvx->p = v; + tvx->depth = trie->vx[v].depth + 1; + tvx->leaf = 0; + tvx->pch = c; + trie->vx[v].go[c] = nv; + } + v = trie->vx[v].go[c]; + } + + if (v != 0) { + trie->vx[v].leaf = 1; + } + + return 0; +} + +static int trie_go(struct trie_container *trie, + int v, uint8_t c); + +static int trie_get_link(struct trie_container *trie, + int v) { + struct trie_vertex *tvx = trie->vx + v; + + if (tvx->link == -1) { + if (v == 0 || tvx->p == 0) { + tvx->link = 0; + } else { + tvx->link = trie_go(trie, + trie_get_link(trie, tvx->p), tvx->pch); + } + } + + return tvx->link; +} + +static int trie_go(struct trie_container *trie, int v, uint8_t c) { + struct trie_vertex *tvx = trie->vx + v; + + if (tvx->go[c] == -1) { + tvx->go[c] = v == 0 ? 0 : + trie_go(trie, trie_get_link(trie, v), c); + } + + return tvx->go[c]; +} + + +int trie_process_str( + struct trie_container *trie, + const uint8_t *str, size_t strlen, + int flags, + size_t *offset, size_t *offlen +) { + int v = 0; + size_t i = 0; + uint8_t c; + int len; + + for (; i < strlen; ++i) { + c = str[i]; + if (c >= TRIE_ALPHABET) { + v = 0; + continue; + } + + v = trie->vx[v].go[c] != -1 ? trie->vx[v].go[c] : + trie_go(trie, v, str[i]); + + if (trie->vx[v].leaf && + ((flags & TRIE_OPT_MAP_TO_END) != TRIE_OPT_MAP_TO_END || + i == strlen - 1) + ) { + ++i; + break; + } + } + + len = trie->vx[v].depth; + if ( trie->vx[v].leaf && + i >= len + ) { + size_t sp = i - len; + *offset = sp; + *offlen = len; + return 1; + } + + return 0; +} diff --git a/src/trie.h b/src/trie.h new file mode 100644 index 0000000..3bcceea --- /dev/null +++ b/src/trie.h @@ -0,0 +1,99 @@ +/* + youtubeUnblock - https://github.com/Waujito/youtubeUnblock + + Copyright (C) 2024-2025 Vadim Vetrov + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/** + * This is slightly optimized Aho-Corasick implementation + * + * Big thanks to e-maxx http://e-maxx.ru/algo/aho_corasick + * for the best description and reference code samples + * + */ + +/** + * + * This algorithm allows us to search inside the string + * for a list of patterns in the linear time. + * + * The algorithm will lazily initialize itself while + * youtubeUnblock works. Lazy initializations considered + * safe for multithreading and operate without atomicity + * or synchronization primitives. + * + */ + +#ifndef TRIE_H +#define TRIE_H + +#include "types.h" + +// ASCII alphabet +#define TRIE_ALPHABET 128 +// Maximum of vertexes in the trie +#define NMAX ((1 << 15) - 1) + +struct trie_vertex { + int leaf; // boolean flag + int depth; // depth of tree (length of substring) + int p; // parent + uint8_t pch; // vertex char + int link; // sufflink + int16_t go[TRIE_ALPHABET]; // dynamically filled pushes +}; + +struct trie_container { + struct trie_vertex *vx; + size_t arrsz; + size_t sz; +}; + +#define TRIE_STARTSZ 32 +int trie_init(struct trie_container *trie); +void trie_destroy(struct trie_container *trie); + +/** + * + * Increases trie vertex container size. + * Returns new vertex index or ret < 0 on error + * + */ +int trie_push_vertex(struct trie_container *trie); +int trie_add_string(struct trie_container *trie, + const uint8_t *str, size_t strlen); + +/** + * Aligns the pattern to the end + */ +#define TRIE_OPT_MAP_TO_END (1 << 1) + +/** + * Searches the string for the patterns. + * flags is TRIE_OPT binary mask with options for search. + * offset, offlen are destination variables with + * offset of the given string and length of target. + * + * returns 1 if target found, 0 otherwise + */ +int trie_process_str( + struct trie_container *trie, + const uint8_t *str, size_t strlen, + int flags, + size_t *offset, size_t *offlen +); + +#endif diff --git a/src/types.h b/src/types.h index 0dc9705..09fba6d 100644 --- a/src/types.h +++ b/src/types.h @@ -68,6 +68,7 @@ typedef __s16 int_least16_t; /* integer of >= 16 bits */ #define free kfree #define malloc(size) kmalloc((size), GFP_KERNEL) +#define realloc(pt, size) krealloc((pt), (size), GFP_KERNEL) #define calloc(n, size) kcalloc((n), (size), GFP_KERNEL) #define ip6_hdr ipv6hdr diff --git a/test/main_fn.c b/test/main_fn.c index d068a95..48b69cc 100644 --- a/test/main_fn.c +++ b/test/main_fn.c @@ -10,6 +10,7 @@ static void RunAllTests(void) { RUN_TEST_GROUP(TLSTest) RUN_TEST_GROUP(QuicTest); + RUN_TEST_GROUP(TrieTest); } int main(int argc, const char * argv[]) diff --git a/test/trie.c b/test/trie.c new file mode 100644 index 0000000..642da76 --- /dev/null +++ b/test/trie.c @@ -0,0 +1,104 @@ +#include "unity.h" +#include "unity_fixture.h" + +#include "trie.h" + +TEST_GROUP(TrieTest); + +TEST_SETUP(TrieTest) +{ +} + +TEST_TEAR_DOWN(TrieTest) +{ +} + +const char ASTR[] = "abacaba"; +const char BSTR[] = "BABABABA"; +const char CSTR[] = "abracadabra"; + +const char tstr[] = "aBABABABDADAabacabracadabraabbbabacabaaaaaabacaba"; + + +TEST(TrieTest, Trie_string_adds) +{ + int ret; + size_t offset; + size_t offlen; + struct trie_container trie; + + ret = trie_init(&trie); + TEST_ASSERT_EQUAL(0, ret); + ret = trie_add_string(&trie, (uint8_t *)ASTR, sizeof(ASTR) - 1); + TEST_ASSERT_EQUAL(0, ret); + ret = trie_add_string(&trie, (uint8_t *)BSTR, sizeof(BSTR) - 1); + TEST_ASSERT_EQUAL(0, ret); + ret = trie_add_string(&trie, (uint8_t *)CSTR, sizeof(CSTR) - 1); + TEST_ASSERT_EQUAL(0, ret); + + TEST_ASSERT_EQUAL(25, trie.sz); + + trie_destroy(&trie); +} + +TEST(TrieTest, Trie_string_finds) +{ + int ret; + size_t offset; + size_t offlen; + struct trie_container trie; + + ret = trie_init(&trie); + ret = trie_add_string(&trie, (uint8_t *)ASTR, sizeof(ASTR) - 1); + ret = trie_add_string(&trie, (uint8_t *)BSTR, sizeof(BSTR) - 1); + ret = trie_add_string(&trie, (uint8_t *)CSTR, sizeof(CSTR) - 1); + + ret = trie_process_str(&trie, + (uint8_t *)tstr, sizeof(tstr) - 1, + 0, &offset, &offlen + ); + TEST_ASSERT_EQUAL(1, ret); + TEST_ASSERT_EQUAL(11, offlen); + TEST_ASSERT_EQUAL_STRING_LEN("abracadabra", tstr + offset, offlen); + + trie_destroy(&trie); +} + +TEST(TrieTest, Trie_string_finds_opt_end) +{ + int ret; + size_t offset; + size_t offlen; + struct trie_container trie; + + ret = trie_init(&trie); + ret = trie_add_string(&trie, (uint8_t *)ASTR, sizeof(ASTR) - 1); + ret = trie_add_string(&trie, (uint8_t *)BSTR, sizeof(BSTR) - 1); + ret = trie_add_string(&trie, (uint8_t *)CSTR, sizeof(CSTR) - 1); + + ret = trie_process_str(&trie, + (uint8_t *)tstr, sizeof(tstr) - 1, + TRIE_OPT_MAP_TO_END, + &offset, &offlen + ); + TEST_ASSERT_EQUAL(1, ret); + TEST_ASSERT_EQUAL(7, offlen); + TEST_ASSERT_EQUAL_STRING_LEN("abacaba", tstr + offset, offlen); + + ret = trie_process_str(&trie, + (uint8_t *)tstr, sizeof(tstr), + TRIE_OPT_MAP_TO_END, + &offset, &offlen + ); + TEST_ASSERT_EQUAL(0, ret); + + trie_destroy(&trie); +} + + +TEST_GROUP_RUNNER(TrieTest) +{ + RUN_TEST_CASE(TrieTest, Trie_string_adds); + RUN_TEST_CASE(TrieTest, Trie_string_finds); + RUN_TEST_CASE(TrieTest, Trie_string_finds_opt_end); +} diff --git a/uspace.mk b/uspace.mk index a1f8690..3699cff 100644 --- a/uspace.mk +++ b/uspace.mk @@ -34,7 +34,7 @@ export CC CCLD LD CFLAGS LDFLAGS LIBNFNETLINK_CFLAGS LIBNFNETLINK_LIBS LIBMNL_CF APP:=$(BUILD_DIR)/youtubeUnblock TEST_APP:=$(BUILD_DIR)/testYoutubeUnblock -SRCS := mangle.c args.c utils.c quic.c tls.c getopt.c quic_crypto.c inet_ntop.c +SRCS := mangle.c args.c utils.c quic.c tls.c getopt.c quic_crypto.c inet_ntop.c trie.c OBJS := $(SRCS:%.c=$(BUILD_DIR)/%.o) APP_EXEC := youtubeUnblock.c APP_OBJ := $(APP_EXEC:%.c=$(BUILD_DIR)/%.o) From f7d0bed7aa7b206fb03fbbbb31173537b3e50921 Mon Sep 17 00:00:00 2001 From: Vadim Vetrov Date: Sun, 2 Feb 2025 23:34:10 +0300 Subject: [PATCH 2/4] Use Aho-Corasick algorithm in tls parsing --- src/args.c | 66 +++++++++--------------------------- src/config.h | 11 +++--- src/tls.c | 96 ++++++++++++++++------------------------------------ src/trie.c | 16 ++++++++- src/trie.h | 7 ---- test/tls.c | 15 ++++---- test/trie.c | 43 +++++++++++++++++++++++ 7 files changed, 117 insertions(+), 137 deletions(-) diff --git a/src/args.c b/src/args.c index b8b66d4..d18b026 100644 --- a/src/args.c +++ b/src/args.c @@ -94,11 +94,9 @@ close_file: } #endif -static int parse_sni_domains(struct domains_list **dlist, const char *domains_str, size_t domains_strlen) { - // Empty and shouldn't be used - struct domains_list ndomain = {0}; - struct domains_list *cdomain = &ndomain; - +static int parse_sni_domains(struct trie_container *trie, const char *domains_str, size_t domains_strlen) { + trie_init(trie); + unsigned int j = 0; for (unsigned int i = 0; i <= domains_strlen; i++) { if (( i == domains_strlen || @@ -119,38 +117,17 @@ static int parse_sni_domains(struct domains_list **dlist, const char *domains_st unsigned int domain_len = (i - j); const char *domain_startp = domains_str + j; - struct domains_list *edomain = malloc(sizeof(struct domains_list)); - *edomain = (struct domains_list){0}; - if (edomain == NULL) { - return -ENOMEM; - } - - edomain->domain_len = domain_len; - edomain->domain_name = malloc(domain_len + 1); - if (edomain->domain_name == NULL) { - return -ENOMEM; - } - - strncpy(edomain->domain_name, domain_startp, domain_len); - edomain->domain_name[domain_len] = '\0'; - cdomain->next = edomain; - cdomain = edomain; + trie_add_string(trie, (const uint8_t *)domain_startp, domain_len); j = i + 1; } } - *dlist = ndomain.next; return 0; } -static void free_sni_domains(struct domains_list *dlist) { - for (struct domains_list *ldl = dlist; ldl != NULL;) { - struct domains_list *ndl = ldl->next; - SFREE(ldl->domain_name); - SFREE(ldl); - ldl = ndl; - } +static void free_sni_domains(struct trie_container *trie) { + trie_destroy(trie); } static long parse_numeric_option(const char* value) { @@ -633,7 +610,7 @@ int yparse_args(struct config_t *config, int argc, char *argv[]) { break; case OPT_SNI_DOMAINS: - free_sni_domains(sect_config->sni_domains); + free_sni_domains(§_config->sni_domains); sect_config->all_domains = 0; if (!strcmp(optarg, "all")) { sect_config->all_domains = 1; @@ -649,7 +626,7 @@ int yparse_args(struct config_t *config, int argc, char *argv[]) { goto error; #else { - free_sni_domains(sect_config->sni_domains); + free_sni_domains(§_config->sni_domains); ret = read_file(optarg); if (ret < 0) { goto error; @@ -662,7 +639,7 @@ int yparse_args(struct config_t *config, int argc, char *argv[]) { } #endif case OPT_EXCLUDE_DOMAINS: - free_sni_domains(sect_config->exclude_sni_domains); + free_sni_domains(§_config->exclude_sni_domains); ret = parse_sni_domains(§_config->exclude_sni_domains, optarg, strlen(optarg)); if (ret < 0) goto error; @@ -674,7 +651,7 @@ int yparse_args(struct config_t *config, int argc, char *argv[]) { goto error; #else { - free_sni_domains(sect_config->exclude_sni_domains); + free_sni_domains(§_config->exclude_sni_domains); ret = read_file(optarg); if (ret < 0) { goto error; @@ -1068,20 +1045,11 @@ static size_t print_config_section(const struct section_config_t *section, char if (section->all_domains) { print_cnf_buf("--sni-domains=all"); - } else if (section->sni_domains != NULL) { - print_cnf_raw("--sni-domains="); - - for (struct domains_list *sne = section->sni_domains; sne != NULL; sne = sne->next) { - print_cnf_raw("%s,", sne->domain_name); - } - print_cnf_raw(" "); + } else if (section->sni_domains.vx != NULL) { + print_cnf_buf("--sni-domains=", section->sni_domains.sz); } - if (section->exclude_sni_domains != NULL) { - print_cnf_raw("--exclude-domains="); - for (struct domains_list *sne = section->exclude_sni_domains; sne != NULL; sne = sne->next) { - print_cnf_raw("%s,", sne->domain_name); - } - print_cnf_raw(" "); + if (section->exclude_sni_domains.vx != NULL) { + print_cnf_buf("--exclude-domains=", section->sni_domains.sz); } switch(section->sni_detection) { @@ -1281,10 +1249,8 @@ void free_config_section(struct section_config_t *section) { SFREE(section->udp_dport_range); } - free_sni_domains(section->sni_domains); - section->sni_domains = NULL; - free_sni_domains(section->exclude_sni_domains); - section->exclude_sni_domains = NULL; + free_sni_domains(§ion->sni_domains); + free_sni_domains(§ion->exclude_sni_domains); section->fake_custom_pkt_sz = 0; SFREE(section->fake_custom_pkt); diff --git a/src/config.h b/src/config.h index 36579d0..f009791 100644 --- a/src/config.h +++ b/src/config.h @@ -25,6 +25,7 @@ #endif #include "types.h" +#include "trie.h" typedef int (*raw_send_t)(const unsigned char *data, size_t data_len); /** @@ -64,8 +65,10 @@ struct section_config_t { struct section_config_t *next; struct section_config_t *prev; - struct domains_list *sni_domains; - struct domains_list *exclude_sni_domains; + // struct domains_list *sni_domains; + // struct domains_list *exclude_sni_domains; + struct trie_container sni_domains; + struct trie_container exclude_sni_domains; unsigned int all_domains; int tls_enabled; @@ -237,8 +240,8 @@ enum { }; #define default_section_config { \ - .sni_domains = NULL, \ - .exclude_sni_domains = NULL, \ + .sni_domains = {0}, \ + .exclude_sni_domains = {0}, \ .all_domains = 0, \ .tls_enabled = 1, \ .frag_sni_reverse = 1, \ diff --git a/src/tls.c b/src/tls.c index 73721b2..73eb118 100644 --- a/src/tls.c +++ b/src/tls.c @@ -33,6 +33,8 @@ int bruteforce_analyze_sni_str( const uint8_t *data, size_t dlen, struct tls_verdict *vrd ) { + size_t offset, offlen; + int ret; *vrd = (struct tls_verdict){0}; if (dlen <= 1) { @@ -47,50 +49,17 @@ int bruteforce_analyze_sni_str( vrd->target_sni_len = vrd->sni_len; return 0; } - int max_domain_len = 0; - for (struct domains_list *sne = section->sni_domains; sne != NULL; - sne = sne->next) { - max_domain_len = max((int)sne->domain_len, max_domain_len); - } - - size_t buf_size = max_domain_len + dlen + 1; - uint8_t *buf = malloc(buf_size); - if (buf == NULL) { - return -ENOMEM; - } - int *nzbuf = malloc(buf_size * sizeof(int)); - if (nzbuf == NULL) { - free(buf); - return -ENOMEM; + // It is safe for multithreading, so dp mutability is ok + ret = trie_process_str((struct trie_container *)§ion->sni_domains, data, dlen, 0, &offset, &offlen); + if (ret) { + vrd->target_sni = 1; + vrd->sni_len = offlen; + vrd->sni_ptr = data + offset; + vrd->target_sni_ptr = vrd->sni_ptr; + vrd->target_sni_len = vrd->sni_len; } - for (struct domains_list *sne = section->sni_domains; sne != NULL; sne = sne->next) { - const char *domain_startp = sne->domain_name; - int domain_len = sne->domain_len; - - int *zbuf = (void *)nzbuf; - - memcpy(buf, domain_startp, domain_len); - memcpy(buf + domain_len, "#", 1); - memcpy(buf + domain_len + 1, data, dlen); - - z_function((char *)buf, zbuf, domain_len + 1 + dlen); - - for (size_t k = 0; k < domain_len + 1 + dlen; k++) { - if (zbuf[k] == domain_len) { - vrd->target_sni = 1; - vrd->sni_len = domain_len; - vrd->sni_ptr = data + (k - domain_len - 1); - vrd->target_sni_ptr = vrd->sni_ptr; - vrd->target_sni_len = vrd->sni_len; - goto return_vrd; - } - } - } -return_vrd: - free(buf); - free(nzbuf); return 0; } static int analyze_sni_str( @@ -98,42 +67,35 @@ static int analyze_sni_str( const char *sni_name, int sni_len, struct tls_verdict *vrd ) { + int ret; + size_t offset, offlen; + if (section->all_domains) { vrd->target_sni = 1; goto check_domain; } - - for (struct domains_list *sne = section->sni_domains; sne != NULL; sne = sne->next) { - const char *sni_startp = sni_name + sni_len - sne->domain_len; - const char *domain_startp = sne->domain_name; - if (sni_len >= sne->domain_len && - sni_len < 128 && - !strncmp(sni_startp, - domain_startp, - sne->domain_len)) { - vrd->target_sni = 1; - vrd->target_sni_ptr = (const uint8_t *)sni_startp; - vrd->target_sni_len = sne->domain_len; - break; - } + lgtrace_addp("abacaba"); + + // It is safe for multithreading, so dp mutability is ok + ret = trie_process_str((struct trie_container *)§ion->sni_domains, + (const uint8_t *)sni_name, sni_len, TRIE_OPT_MAP_TO_END, &offset, &offlen); + if (ret) { + vrd->target_sni = 1; + vrd->target_sni_ptr = (const uint8_t *)sni_name + offset; + vrd->target_sni_len = offlen; } check_domain: if (vrd->target_sni == 1) { - for (struct domains_list *sne = section->exclude_sni_domains; sne != NULL; sne = sne->next) { - const char *sni_startp = sni_name + sni_len - sne->domain_len; - const char *domain_startp = sne->domain_name; - if (sni_len >= sne->domain_len && - sni_len < 128 && - !strncmp(sni_startp, - domain_startp, - sne->domain_len)) { - vrd->target_sni = 0; - lgdebug("Excluded SNI: %.*s", - vrd->sni_len, vrd->sni_ptr); - } + // It is safe for multithreading, so dp mutability is ok + ret = trie_process_str((struct trie_container *)§ion->exclude_sni_domains, + (const uint8_t *)sni_name, sni_len, TRIE_OPT_MAP_TO_END, &offset, &offlen); + if (ret) { + vrd->target_sni = 0; + lgdebug("Excluded SNI: %.*s", + vrd->sni_len, vrd->sni_ptr); } } diff --git a/src/trie.c b/src/trie.c index 1d4af2a..30f08e2 100644 --- a/src/trie.c +++ b/src/trie.c @@ -52,7 +52,13 @@ void trie_destroy(struct trie_container *trie) { trie->vx = NULL; } -int trie_push_vertex(struct trie_container *trie) { +/** + * + * Increases trie vertex container size. + * Returns new vertex index or ret < 0 on error + * + */ +static int trie_push_vertex(struct trie_container *trie) { if (trie->sz == NMAX - 1) { return -EINVAL; } @@ -74,6 +80,10 @@ int trie_push_vertex(struct trie_container *trie) { int trie_add_string(struct trie_container *trie, const uint8_t *str, size_t strlen) { + if (trie == NULL || trie->vx == NULL) { + return -EINVAL; + } + int v = 0; int nv; @@ -145,6 +155,10 @@ int trie_process_str( int flags, size_t *offset, size_t *offlen ) { + if (trie == NULL || trie->vx == NULL) { + return 0; + } + int v = 0; size_t i = 0; uint8_t c; diff --git a/src/trie.h b/src/trie.h index 3bcceea..7d8b565 100644 --- a/src/trie.h +++ b/src/trie.h @@ -66,13 +66,6 @@ struct trie_container { int trie_init(struct trie_container *trie); void trie_destroy(struct trie_container *trie); -/** - * - * Increases trie vertex container size. - * Returns new vertex index or ret < 0 on error - * - */ -int trie_push_vertex(struct trie_container *trie); int trie_add_string(struct trie_container *trie, const uint8_t *str, size_t strlen); diff --git a/test/tls.c b/test/tls.c index cd25aa0..f5803db 100644 --- a/test/tls.c +++ b/test/tls.c @@ -36,22 +36,21 @@ TEST(TLSTest, Test_CHLO_message_detect) TEST(TLSTest, Test_Bruteforce_detects) { struct tls_verdict tlsv; - struct domains_list dmns = { - .domain_name = "youtube.com", - .domain_len = 11, - .next = NULL - }; - sconf.sni_domains = &dmns; + struct trie_container trie; + int ret; + ret = trie_init(&trie); + ret = trie_add_string(&trie, (uint8_t *)"youtube.com", 11); + sconf.sni_domains = trie; - int ret = bruteforce_analyze_sni_str(&sconf, (const uint8_t *)tls_bruteforce_message, sizeof(tls_bruteforce_message) - 1, &tlsv); + ret = bruteforce_analyze_sni_str(&sconf, (const uint8_t *)tls_bruteforce_message, sizeof(tls_bruteforce_message) - 1, &tlsv); TEST_ASSERT_EQUAL(0, ret); TEST_ASSERT_EQUAL(11, tlsv.sni_len); TEST_ASSERT_EQUAL_STRING_LEN("youtube.com", tlsv.sni_ptr, 11); TEST_ASSERT_EQUAL_PTR(tls_bruteforce_message + sizeof(tls_bruteforce_message) - 12, tlsv.sni_ptr); + trie_destroy(&trie); } - TEST_GROUP_RUNNER(TLSTest) { RUN_TEST_CASE(TLSTest, Test_CHLO_message_detect); diff --git a/test/trie.c b/test/trie.c index 642da76..34c26fb 100644 --- a/test/trie.c +++ b/test/trie.c @@ -95,10 +95,53 @@ TEST(TrieTest, Trie_string_finds_opt_end) trie_destroy(&trie); } +TEST(TrieTest, Trie_single_vertex) +{ + int ret; + size_t offset; + size_t offlen; + struct trie_container trie; + + ret = trie_init(&trie); + + ret = trie_process_str(&trie, + (uint8_t *)tstr, sizeof(tstr) - 1, + 0, + &offset, &offlen + ); + TEST_ASSERT_EQUAL(0, ret); + + trie_destroy(&trie); + +} + +TEST(TrieTest, Trie_uninitialized) +{ + int ret; + size_t offset; + size_t offlen; + struct trie_container trie = {0}; + + // ret = trie_init(&trie); + + ret = trie_add_string(&trie, (uint8_t *)ASTR, sizeof(ASTR) - 1); + TEST_ASSERT_EQUAL(-EINVAL, ret); + + ret = trie_process_str(&trie, + (uint8_t *)tstr, sizeof(tstr) - 1, + 0, + &offset, &offlen + ); + TEST_ASSERT_EQUAL(0, ret); + +} + TEST_GROUP_RUNNER(TrieTest) { RUN_TEST_CASE(TrieTest, Trie_string_adds); RUN_TEST_CASE(TrieTest, Trie_string_finds); RUN_TEST_CASE(TrieTest, Trie_string_finds_opt_end); + RUN_TEST_CASE(TrieTest, Trie_single_vertex); + RUN_TEST_CASE(TrieTest, Trie_uninitialized); } From a7b689b320ac2ad36930fb08ee748fc211b2bf0c Mon Sep 17 00:00:00 2001 From: Vadim Vetrov Date: Mon, 3 Feb 2025 15:30:10 +0300 Subject: [PATCH 3/4] Fix warnings --- deps/cyclone/include/compiler_port.h | 2 +- src/tls.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/deps/cyclone/include/compiler_port.h b/deps/cyclone/include/compiler_port.h index 0c9f072..c63dc9d 100644 --- a/deps/cyclone/include/compiler_port.h +++ b/deps/cyclone/include/compiler_port.h @@ -157,7 +157,7 @@ typedef unsigned int uint_t; #elif defined(__GNUC__) int strcasecmp(const char *s1, const char *s2); int strncasecmp(const char *s1, const char *s2, size_t n); -#if !(_SVID_SOURCE || _BSD_SOURCE || _POSIX_C_SOURCE >= 1 || _XOPEN_SOURCE || _POSIX_SOURCE) +#if !(defined(_SVID_SOURCE) || defined(_BSD_SOURCE) || (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 1) || defined(_XOPEN_SOURCE) || defined(_POSIX_SOURCE)) char *strtok_r(char *s, const char *delim, char **last); #endif diff --git a/src/tls.c b/src/tls.c index 73eb118..9cd79f9 100644 --- a/src/tls.c +++ b/src/tls.c @@ -75,8 +75,6 @@ static int analyze_sni_str( goto check_domain; } - lgtrace_addp("abacaba"); - // It is safe for multithreading, so dp mutability is ok ret = trie_process_str((struct trie_container *)§ion->sni_domains, (const uint8_t *)sni_name, sni_len, TRIE_OPT_MAP_TO_END, &offset, &offlen); From 6da6f63541cc285dd0b4fe01d34e347cdef07f53 Mon Sep 17 00:00:00 2001 From: Vadim Vetrov Date: Tue, 4 Feb 2025 18:40:59 +0300 Subject: [PATCH 4/4] Delete old domains data structures --- src/config.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/config.h b/src/config.h index f009791..dee0b03 100644 --- a/src/config.h +++ b/src/config.h @@ -53,20 +53,11 @@ struct udp_dport_range { uint16_t end; }; -struct domains_list { - char *domain_name; - uint16_t domain_len; - - struct domains_list *next; -}; - struct section_config_t { int id; struct section_config_t *next; struct section_config_t *prev; - // struct domains_list *sni_domains; - // struct domains_list *exclude_sni_domains; struct trie_container sni_domains; struct trie_container exclude_sni_domains; unsigned int all_domains;