diff --git a/binaries/aarch64/ip2net b/binaries/aarch64/ip2net index 633d34b7..ae734298 100755 Binary files a/binaries/aarch64/ip2net and b/binaries/aarch64/ip2net differ diff --git a/binaries/arm/ip2net b/binaries/arm/ip2net index 87523ed0..34cac9a4 100755 Binary files a/binaries/arm/ip2net and b/binaries/arm/ip2net differ diff --git a/binaries/freebsd-x64/ip2net b/binaries/freebsd-x64/ip2net index 45785964..efc65e7a 100755 Binary files a/binaries/freebsd-x64/ip2net and b/binaries/freebsd-x64/ip2net differ diff --git a/binaries/mips32r1-lsb/ip2net b/binaries/mips32r1-lsb/ip2net index 4d7c29ee..9af01b3c 100755 Binary files a/binaries/mips32r1-lsb/ip2net and b/binaries/mips32r1-lsb/ip2net differ diff --git a/binaries/mips32r1-msb/ip2net b/binaries/mips32r1-msb/ip2net index 94df0eb0..a4481acb 100755 Binary files a/binaries/mips32r1-msb/ip2net and b/binaries/mips32r1-msb/ip2net differ diff --git a/binaries/mips64r2-msb/ip2net b/binaries/mips64r2-msb/ip2net index e86926de..28f63ceb 100755 Binary files a/binaries/mips64r2-msb/ip2net and b/binaries/mips64r2-msb/ip2net differ diff --git a/binaries/ppc/ip2net b/binaries/ppc/ip2net index 7f471bac..7225e3b2 100755 Binary files a/binaries/ppc/ip2net and b/binaries/ppc/ip2net differ diff --git a/binaries/x86/ip2net b/binaries/x86/ip2net index 46cd7efd..19e81fcc 100755 Binary files a/binaries/x86/ip2net and b/binaries/x86/ip2net differ diff --git a/binaries/x86_64/ip2net b/binaries/x86_64/ip2net index 73464a86..de1dfe88 100755 Binary files a/binaries/x86_64/ip2net and b/binaries/x86_64/ip2net differ diff --git a/ip2net/ip2net.c b/ip2net/ip2net.c index dede7e79..60316bda 100644 --- a/ip2net/ip2net.c +++ b/ip2net/ip2net.c @@ -102,7 +102,14 @@ static inline const struct in6_addr *mask_from_bitcount6(uint32_t zct) } -// result = a & b +/* +// this is "correct" solution for strict aliasing feature +// but I don't like this style of coding +// write what I don't mean to force smart optimizer to do what it's best +// it produces better code sometimes but not on all compilers/versions/archs +// sometimes it even generates real memcpy calls (mips32,arm32) +// so I will not do it + static void ip6_and(const struct in6_addr *a, const struct in6_addr *b, struct in6_addr *result) { uint64_t a_addr[2], b_addr[2]; @@ -112,6 +119,24 @@ static void ip6_and(const struct in6_addr *a, const struct in6_addr *b, struct i a_addr[1] &= b_addr[1]; memcpy(result->s6_addr, a_addr, 16); } +*/ + +// YES, from my point of view C should work as a portable assembler. It must do what I instruct it to do. +// that's why I disable strict aliasing for this function. I observed gcc can miscompile with O2/O3 setting if inlined and not coded "correct" +// result = a & b +#if defined(__GNUC__) && !defined(__llvm__) +__attribute__((optimize ("no-strict-aliasing"))) +#endif +static void ip6_and(const struct in6_addr *a, const struct in6_addr *b, struct in6_addr *result) +{ +#ifdef __SIZEOF_INT128__ + // gcc and clang have 128 bit int types on some 64-bit archs. take some advantage + *((unsigned __int128*)result->s6_addr) = *((unsigned __int128*)a->s6_addr) & *((unsigned __int128*)b->s6_addr); +#else + ((uint64_t*)result->s6_addr)[0] = ((uint64_t*)a->s6_addr)[0] & ((uint64_t*)b->s6_addr)[0]; + ((uint64_t*)result->s6_addr)[1] = ((uint64_t*)a->s6_addr)[1] & ((uint64_t*)b->s6_addr)[1]; +#endif +} static void rtrim(char *s) {