Merge branch 'aho_corasick'

This commit is contained in:
Vadim Vetrov
2025-02-08 11:40:18 +03:00
12 changed files with 500 additions and 137 deletions

View File

@@ -98,11 +98,10 @@ close_file:
}
#endif
static int parse_sni_domains(struct domains_list **dlist, const char *domains_str, size_t domains_strlen) {
// Empty and shouldn't be used
struct domains_list ndomain = {0};
struct domains_list *cdomain = &ndomain;
static int parse_sni_domains(struct trie_container *trie, const char *domains_str, size_t domains_strlen) {
int ret;
trie_init(trie);
unsigned int j = 0;
for (unsigned int i = 0; i <= domains_strlen; i++) {
if (( i == domains_strlen ||
@@ -123,38 +122,22 @@ static int parse_sni_domains(struct domains_list **dlist, const char *domains_st
unsigned int domain_len = (i - j);
const char *domain_startp = domains_str + j;
struct domains_list *edomain = malloc(sizeof(struct domains_list));
if (edomain == NULL) {
return -ENOMEM;
}
*edomain = (struct domains_list){0};
edomain->domain_len = domain_len;
edomain->domain_name = malloc(domain_len + 1);
if (edomain->domain_name == NULL) {
return -ENOMEM;
ret = trie_add_string(trie, (const uint8_t *)domain_startp, domain_len);
if (ret < 0) {
lgerror(ret, "trie_add_string");
return ret;
}
strncpy(edomain->domain_name, domain_startp, domain_len);
edomain->domain_name[domain_len] = '\0';
cdomain->next = edomain;
cdomain = edomain;
j = i + 1;
}
}
*dlist = ndomain.next;
return 0;
}
static void free_sni_domains(struct domains_list *dlist) {
for (struct domains_list *ldl = dlist; ldl != NULL;) {
struct domains_list *ndl = ldl->next;
SFREE(ldl->domain_name);
SFREE(ldl);
ldl = ndl;
}
static void free_sni_domains(struct trie_container *trie) {
trie_destroy(trie);
}
static long parse_numeric_option(const char* value) {
@@ -640,7 +623,7 @@ int yparse_args(struct config_t *config, int argc, char *argv[]) {
break;
case OPT_SNI_DOMAINS:
free_sni_domains(sect_config->sni_domains);
free_sni_domains(&sect_config->sni_domains);
sect_config->all_domains = 0;
if (!strcmp(optarg, "all")) {
sect_config->all_domains = 1;
@@ -656,7 +639,7 @@ int yparse_args(struct config_t *config, int argc, char *argv[]) {
goto error;
#else
{
free_sni_domains(sect_config->sni_domains);
free_sni_domains(&sect_config->sni_domains);
ret = read_file(optarg);
if (ret < 0) {
goto error;
@@ -669,7 +652,7 @@ int yparse_args(struct config_t *config, int argc, char *argv[]) {
}
#endif
case OPT_EXCLUDE_DOMAINS:
free_sni_domains(sect_config->exclude_sni_domains);
free_sni_domains(&sect_config->exclude_sni_domains);
ret = parse_sni_domains(&sect_config->exclude_sni_domains, optarg, strlen(optarg));
if (ret < 0)
goto error;
@@ -681,7 +664,7 @@ int yparse_args(struct config_t *config, int argc, char *argv[]) {
goto error;
#else
{
free_sni_domains(sect_config->exclude_sni_domains);
free_sni_domains(&sect_config->exclude_sni_domains);
ret = read_file(optarg);
if (ret < 0) {
goto error;
@@ -1075,20 +1058,11 @@ static size_t print_config_section(const struct section_config_t *section, char
if (section->all_domains) {
print_cnf_buf("--sni-domains=all");
} else if (section->sni_domains != NULL) {
print_cnf_raw("--sni-domains=");
for (struct domains_list *sne = section->sni_domains; sne != NULL; sne = sne->next) {
print_cnf_raw("%s,", sne->domain_name);
}
print_cnf_raw(" ");
} else if (section->sni_domains.vx != NULL) {
print_cnf_buf("--sni-domains=<trie of %zu vertexes>", section->sni_domains.sz);
}
if (section->exclude_sni_domains != NULL) {
print_cnf_raw("--exclude-domains=");
for (struct domains_list *sne = section->exclude_sni_domains; sne != NULL; sne = sne->next) {
print_cnf_raw("%s,", sne->domain_name);
}
print_cnf_raw(" ");
if (section->exclude_sni_domains.vx != NULL) {
print_cnf_buf("--exclude-domains=<trie of %zu vertexes>", section->sni_domains.sz);
}
switch(section->sni_detection) {
@@ -1288,10 +1262,8 @@ void free_config_section(struct section_config_t *section) {
SFREE(section->udp_dport_range);
}
free_sni_domains(section->sni_domains);
section->sni_domains = NULL;
free_sni_domains(section->exclude_sni_domains);
section->exclude_sni_domains = NULL;
free_sni_domains(&section->sni_domains);
free_sni_domains(&section->exclude_sni_domains);
section->fake_custom_pkt_sz = 0;
SFREE(section->fake_custom_pkt);

View File

@@ -25,6 +25,7 @@
#endif
#include "types.h"
#include "trie.h"
typedef int (*raw_send_t)(const unsigned char *data, size_t data_len);
/**
@@ -52,20 +53,13 @@ struct udp_dport_range {
uint16_t end;
};
struct domains_list {
char *domain_name;
uint16_t domain_len;
struct domains_list *next;
};
struct section_config_t {
int id;
struct section_config_t *next;
struct section_config_t *prev;
struct domains_list *sni_domains;
struct domains_list *exclude_sni_domains;
struct trie_container sni_domains;
struct trie_container exclude_sni_domains;
unsigned int all_domains;
int tls_enabled;
@@ -237,8 +231,8 @@ enum {
};
#define default_section_config { \
.sni_domains = NULL, \
.exclude_sni_domains = NULL, \
.sni_domains = {0}, \
.exclude_sni_domains = {0}, \
.all_domains = 0, \
.tls_enabled = 1, \
.frag_sni_reverse = 1, \

View File

@@ -33,6 +33,8 @@ int bruteforce_analyze_sni_str(
const uint8_t *data, size_t dlen,
struct tls_verdict *vrd
) {
size_t offset, offlen;
int ret;
*vrd = (struct tls_verdict){0};
if (dlen <= 1) {
@@ -47,50 +49,17 @@ int bruteforce_analyze_sni_str(
vrd->target_sni_len = vrd->sni_len;
return 0;
}
int max_domain_len = 0;
for (struct domains_list *sne = section->sni_domains; sne != NULL;
sne = sne->next) {
max_domain_len = max((int)sne->domain_len, max_domain_len);
}
size_t buf_size = max_domain_len + dlen + 1;
uint8_t *buf = malloc(buf_size);
if (buf == NULL) {
return -ENOMEM;
}
int *nzbuf = malloc(buf_size * sizeof(int));
if (nzbuf == NULL) {
free(buf);
return -ENOMEM;
// It is safe for multithreading, so dp mutability is ok
ret = trie_process_str((struct trie_container *)&section->sni_domains, data, dlen, 0, &offset, &offlen);
if (ret) {
vrd->target_sni = 1;
vrd->sni_len = offlen;
vrd->sni_ptr = data + offset;
vrd->target_sni_ptr = vrd->sni_ptr;
vrd->target_sni_len = vrd->sni_len;
}
for (struct domains_list *sne = section->sni_domains; sne != NULL; sne = sne->next) {
const char *domain_startp = sne->domain_name;
int domain_len = sne->domain_len;
int *zbuf = (void *)nzbuf;
memcpy(buf, domain_startp, domain_len);
memcpy(buf + domain_len, "#", 1);
memcpy(buf + domain_len + 1, data, dlen);
z_function((char *)buf, zbuf, domain_len + 1 + dlen);
for (size_t k = 0; k < domain_len + 1 + dlen; k++) {
if (zbuf[k] == domain_len) {
vrd->target_sni = 1;
vrd->sni_len = domain_len;
vrd->sni_ptr = data + (k - domain_len - 1);
vrd->target_sni_ptr = vrd->sni_ptr;
vrd->target_sni_len = vrd->sni_len;
goto return_vrd;
}
}
}
return_vrd:
free(buf);
free(nzbuf);
return 0;
}
static int analyze_sni_str(
@@ -98,42 +67,33 @@ static int analyze_sni_str(
const char *sni_name, int sni_len,
struct tls_verdict *vrd
) {
int ret;
size_t offset, offlen;
if (section->all_domains) {
vrd->target_sni = 1;
goto check_domain;
}
for (struct domains_list *sne = section->sni_domains; sne != NULL; sne = sne->next) {
const char *sni_startp = sni_name + sni_len - sne->domain_len;
const char *domain_startp = sne->domain_name;
if (sni_len >= sne->domain_len &&
sni_len < 128 &&
!strncmp(sni_startp,
domain_startp,
sne->domain_len)) {
vrd->target_sni = 1;
vrd->target_sni_ptr = (const uint8_t *)sni_startp;
vrd->target_sni_len = sne->domain_len;
break;
}
// It is safe for multithreading, so dp mutability is ok
ret = trie_process_str((struct trie_container *)&section->sni_domains,
(const uint8_t *)sni_name, sni_len, TRIE_OPT_MAP_TO_END, &offset, &offlen);
if (ret) {
vrd->target_sni = 1;
vrd->target_sni_ptr = (const uint8_t *)sni_name + offset;
vrd->target_sni_len = offlen;
}
check_domain:
if (vrd->target_sni == 1) {
for (struct domains_list *sne = section->exclude_sni_domains; sne != NULL; sne = sne->next) {
const char *sni_startp = sni_name + sni_len - sne->domain_len;
const char *domain_startp = sne->domain_name;
if (sni_len >= sne->domain_len &&
sni_len < 128 &&
!strncmp(sni_startp,
domain_startp,
sne->domain_len)) {
vrd->target_sni = 0;
lgdebug("Excluded SNI: %.*s",
vrd->sni_len, vrd->sni_ptr);
}
// It is safe for multithreading, so dp mutability is ok
ret = trie_process_str((struct trie_container *)&section->exclude_sni_domains,
(const uint8_t *)sni_name, sni_len, TRIE_OPT_MAP_TO_END, &offset, &offlen);
if (ret) {
vrd->target_sni = 0;
lgdebug("Excluded SNI: %.*s",
vrd->sni_len, vrd->sni_ptr);
}
}

197
src/trie.c Normal file
View File

@@ -0,0 +1,197 @@
/*
youtubeUnblock - https://github.com/Waujito/youtubeUnblock
Copyright (C) 2024-2025 Vadim Vetrov <vetrovvd@gmail.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/**
* This is slightly optimized Aho-Corasick implementation
*
* Big thanks to e-maxx http://e-maxx.ru/algo/aho_corasick
* for the best description and reference code samples
*/
#include "trie.h"
int trie_init(struct trie_container *trie) {
void *vx = malloc(sizeof(struct trie_vertex) * TRIE_STARTSZ);
if (vx == NULL) {
return -ENOMEM;
}
trie->vx = vx;
trie->arrsz = TRIE_STARTSZ;
trie->sz = 1;
struct trie_vertex *trx = trie->vx;
trx->p = trx->link = -1;
trx->leaf = 0;
trx->depth = 0;
trx->pch = 0;
memset(trx->go, 0xff, sizeof(trie->vx[0].go));
return 0;
}
void trie_destroy(struct trie_container *trie) {
trie->arrsz = 0;
trie->sz = 0;
free(trie->vx);
trie->vx = NULL;
}
/**
*
* Increases trie vertex container size.
* Returns new vertex index or ret < 0 on error
*
*/
static int trie_push_vertex(struct trie_container *trie) {
if (trie->sz == NMAX - 1) {
return -EINVAL;
}
if (trie->arrsz == trie->sz) { // realloc
void *pt = realloc(trie->vx,
sizeof(struct trie_vertex) * trie->arrsz * 2);
if (pt == NULL) {
return -ENOMEM;
}
trie->arrsz *= 2;
trie->vx = pt;
}
return trie->sz++;
}
int trie_add_string(struct trie_container *trie,
const uint8_t *str, size_t strlen) {
if (trie == NULL || trie->vx == NULL) {
return -EINVAL;
}
int v = 0;
int nv;
for (size_t i = 0; i < strlen; ++i) {
uint8_t c = str[i];
if (c >= TRIE_ALPHABET) {
return -EINVAL;
}
if (trie->vx[v].go[c] == -1) {
nv = trie_push_vertex(trie);
if (nv < 0) {
return nv;
}
struct trie_vertex *tvx = trie->vx + nv;
memset(tvx->go, 0xff, sizeof(tvx->go));
tvx->link = -1;
tvx->p = v;
tvx->depth = trie->vx[v].depth + 1;
tvx->leaf = 0;
tvx->pch = c;
trie->vx[v].go[c] = nv;
}
v = trie->vx[v].go[c];
}
if (v != 0) {
trie->vx[v].leaf = 1;
}
return 0;
}
static int trie_go(struct trie_container *trie,
int v, uint8_t c);
static int trie_get_link(struct trie_container *trie,
int v) {
struct trie_vertex *tvx = trie->vx + v;
if (tvx->link == -1) {
if (v == 0 || tvx->p == 0) {
tvx->link = 0;
} else {
tvx->link = trie_go(trie,
trie_get_link(trie, tvx->p), tvx->pch);
}
}
return tvx->link;
}
static int trie_go(struct trie_container *trie, int v, uint8_t c) {
struct trie_vertex *tvx = trie->vx + v;
if (tvx->go[c] == -1) {
tvx->go[c] = v == 0 ? 0 :
trie_go(trie, trie_get_link(trie, v), c);
}
return tvx->go[c];
}
int trie_process_str(
struct trie_container *trie,
const uint8_t *str, size_t strlen,
int flags,
size_t *offset, size_t *offlen
) {
if (trie == NULL || trie->vx == NULL) {
return 0;
}
int v = 0;
size_t i = 0;
uint8_t c;
int len;
for (; i < strlen; ++i) {
c = str[i];
if (c >= TRIE_ALPHABET) {
v = 0;
continue;
}
v = trie->vx[v].go[c] != -1 ? trie->vx[v].go[c] :
trie_go(trie, v, str[i]);
if (trie->vx[v].leaf &&
((flags & TRIE_OPT_MAP_TO_END) != TRIE_OPT_MAP_TO_END ||
i == strlen - 1)
) {
++i;
break;
}
}
len = trie->vx[v].depth;
if ( trie->vx[v].leaf &&
i >= len
) {
size_t sp = i - len;
*offset = sp;
*offlen = len;
return 1;
}
return 0;
}

92
src/trie.h Normal file
View File

@@ -0,0 +1,92 @@
/*
youtubeUnblock - https://github.com/Waujito/youtubeUnblock
Copyright (C) 2024-2025 Vadim Vetrov <vetrovvd@gmail.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/**
* This is slightly optimized Aho-Corasick implementation
*
* Big thanks to e-maxx http://e-maxx.ru/algo/aho_corasick
* for the best description and reference code samples
*
*/
/**
*
* This algorithm allows us to search inside the string
* for a list of patterns in the linear time.
*
* The algorithm will lazily initialize itself while
* youtubeUnblock works. Lazy initializations considered
* safe for multithreading and operate without atomicity
* or synchronization primitives.
*
*/
#ifndef TRIE_H
#define TRIE_H
#include "types.h"
// ASCII alphabet
#define TRIE_ALPHABET 128
// Maximum of vertexes in the trie
#define NMAX ((1 << 15) - 1)
struct trie_vertex {
int leaf; // boolean flag
int depth; // depth of tree (length of substring)
int p; // parent
uint8_t pch; // vertex char
int link; // sufflink
int16_t go[TRIE_ALPHABET]; // dynamically filled pushes
};
struct trie_container {
struct trie_vertex *vx;
size_t arrsz;
size_t sz;
};
#define TRIE_STARTSZ 32
int trie_init(struct trie_container *trie);
void trie_destroy(struct trie_container *trie);
int trie_add_string(struct trie_container *trie,
const uint8_t *str, size_t strlen);
/**
* Aligns the pattern to the end
*/
#define TRIE_OPT_MAP_TO_END (1 << 1)
/**
* Searches the string for the patterns.
* flags is TRIE_OPT binary mask with options for search.
* offset, offlen are destination variables with
* offset of the given string and length of target.
*
* returns 1 if target found, 0 otherwise
*/
int trie_process_str(
struct trie_container *trie,
const uint8_t *str, size_t strlen,
int flags,
size_t *offset, size_t *offlen
);
#endif

View File

@@ -68,6 +68,7 @@ typedef __s16 int_least16_t; /* integer of >= 16 bits */
#define free kfree
#define malloc(size) kmalloc((size), GFP_KERNEL)
#define realloc(pt, size) krealloc((pt), (size), GFP_KERNEL)
#define calloc(n, size) kcalloc((n), (size), GFP_KERNEL)
#define ip6_hdr ipv6hdr