checksum: add VSX fast path for POWER8/POWER9
Tested with podman on Debian 13 for a while and works ok. It's difficult to run all the tests on POWER but 505-networking-pasta.bats test suite passes.
Hi jfiusdq,
On Thu, 05 Feb 2026 06:14:40 +0000
jfiusdq
Tested with podman on Debian 13 for a while and works ok. It's difficult to run all the tests on POWER but 505-networking-pasta.bats test suite passes.
Thanks for the patch! I'm not really familiar with AltiVec / VSX or POWER at all so it's difficult for me to review this, but we have somebody on the list who should be able to help. :) It might need a bit of time though. Meanwhile, it would be nice if you could send this patch in the usual format, using git send-email and adding a Signed-off-by: tag. This is just the same submission format as the Linux kernel and many other opensource projects, see the archives for examples: https://archives.passt.top/passt-dev/ I understand you might not want to reveal your full name, and that's entirely fine, but still it would be better if you could send the patch in the usual format. We'll accept patches regardless of the submission format though, so that's not a strict requirement, just a nice-to-have. Thanks. -- Stefano
On Thu, 05 Feb 2026 06:14:40 +0000, jfiusdq
Tested with podman on Debian 13 for a while and works ok. It's difficult to run all the tests on POWER but 505-networking-pasta.bats test suite passes. --- checksum.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 3 deletions(-)
diff --git a/checksum.c b/checksum.c index 0c3837c..828f9ec 100644 --- a/checksum.c +++ b/checksum.c @@ -281,7 +281,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr, icmp6hr->icmp6_cksum = csum(payload, dlen, psum); }
-#ifdef __AVX2__ +#if defined(__AVX2__) #include
/** @@ -479,7 +479,111 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init)
return init; } -#else /* __AVX2__ */ +#elif defined(__POWER9_VECTOR__) || defined(__POWER8_VECTOR__) +#include
+ +/** + * csum_vsx() - Compute 32-bit checksum using VSX SIMD instructions + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 32-bit checksum, not complemented, not folded + */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ +static uint32_t csum_vsx(const void *buf, size_t len, uint32_t init) +{ + const uint8_t *p = buf; + vector unsigned int sum_even = vec_splat_u32(0); + vector unsigned int sum_odd = vec_splat_u32(0); + const vector unsigned short ones = vec_splat_u16(1); + uint64_t sum64 = init; + +#ifdef __POWER9_VECTOR__ + while (len >= 64) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned char v1b = vec_vsx_ld(16, p); + vector unsigned char v2b = vec_vsx_ld(32, p); + vector unsigned char v3b = vec_vsx_ld(48, p); + vector unsigned short v0 = (vector unsigned short)v0b; + vector unsigned short v1 = (vector unsigned short)v1b; + vector unsigned short v2 = (vector unsigned short)v2b; + vector unsigned short v3 = (vector unsigned short)v3b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + sum_even = vec_add(sum_even, vec_mule(v1, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v1, ones)); + sum_even = vec_add(sum_even, vec_mule(v2, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v2, ones)); + sum_even = vec_add(sum_even, vec_mule(v3, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v3, ones)); + + p += 64; + len -= 64; + } +#endif + + while (len >= 32) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned char v1b = vec_vsx_ld(16, p); + vector unsigned short v0 = (vector unsigned short)v0b; + vector unsigned short v1 = (vector unsigned short)v1b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + sum_even = vec_add(sum_even, vec_mule(v1, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v1, ones)); + + p += 32; + len -= 32; + } + + while (len >= 16) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned short v0 = (vector unsigned short)v0b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + + p += 16; + len -= 16; + } + + { + vector unsigned int sum32 = vec_add(sum_even, sum_odd); + uint32_t partial[4] __attribute__((aligned(16))); + + vec_st(sum32, 0, partial); + sum64 += (uint64_t)partial[0] + partial[1] + + partial[2] + partial[3]; + } + + sum64 += sum_16b(p, len); + + sum64 = (sum64 >> 32) + (sum64 & 0xffffffff); + sum64 += sum64 >> 32; + + return (uint32_t)sum64; +} + +/** + * csum_unfolded() - Calculate the unfolded checksum of a data buffer. + * + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 32-bit unfolded checksum + */ +/* NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) */ +__attribute__((optimize("-fno-strict-aliasing"))) /* See csum_16b() */ +uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init) +{ + return csum_vsx(buf, len, init); +} +#else /* !__AVX2__ && !__POWER9_VECTOR__ && !__POWER8_VECTOR__ */ /** * csum_unfolded() - Calculate the unfolded checksum of a data buffer. * @@ -495,7 +599,7 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init) { return sum_16b(buf, len) + init; } -#endif /* !__AVX2__ */ +#endif /* !__AVX2__ && !__POWER9_VECTOR__ && !__POWER8_VECTOR__ */ /** * csum_iov_tail() - Calculate unfolded checksum for the tail of an IO vector -- 2.52.0
Reviewed-by: Laurent Vivier
Microbenchmark of the checksum function vs C version at different buffer sizes:
Results (GB/s, higher is better; speedup = VSX / scalar):
64B: VSX 4.61 vs scalar 5.91 -> 0.78x (VSX slower for tiny buffers)
256B: VSX 10.91 vs scalar 7.57 -> 1.44x
1500B: VSX 13.88 vs scalar 6.89 -> 2.02x
16KB: VSX 14.53 vs scalar 6.96 -> 2.09x
64KB: VSX 15.15 vs scalar 6.85 -> 2.21x
On Friday, February 6th, 2026 at 3:17 PM, Laurent Vivier
On Thu, 05 Feb 2026 06:14:40 +0000, jfiusdq jfiusdq@proton.me wrote:
Tested with podman on Debian 13 for a while and works ok. It's difficult to run all the tests on POWER but 505-networking-pasta.bats test suite passes. --- checksum.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 3 deletions(-)
diff --git a/checksum.c b/checksum.c index 0c3837c..828f9ec 100644 --- a/checksum.c +++ b/checksum.c @@ -281,7 +281,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr, icmp6hr->icmp6_cksum = csum(payload, dlen, psum); }
-#ifdef AVX2 +#if defined(AVX2) #include
/** @@ -479,7 +479,111 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init)
return init; } -#else /* AVX2 / +#elif defined(POWER9_VECTOR) || defined(POWER8_VECTOR) +#include
+ +/* + * csum_vsx() - Compute 32-bit checksum using VSX SIMD instructions + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 32-bit checksum, not complemented, not folded + / +/ NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) / +attribute((optimize("-fno-strict-aliasing"))) / See csum_16b() */ +static uint32_t csum_vsx(const void buf, size_t len, uint32_t init) +{ + const uint8_t p = buf; + vector unsigned int sum_even = vec_splat_u32(0); + vector unsigned int sum_odd = vec_splat_u32(0); + const vector unsigned short ones = vec_splat_u16(1); + uint64_t sum64 = init; + +#ifdef POWER9_VECTOR + while (len >= 64) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned char v1b = vec_vsx_ld(16, p); + vector unsigned char v2b = vec_vsx_ld(32, p); + vector unsigned char v3b = vec_vsx_ld(48, p); + vector unsigned short v0 = (vector unsigned short)v0b; + vector unsigned short v1 = (vector unsigned short)v1b; + vector unsigned short v2 = (vector unsigned short)v2b; + vector unsigned short v3 = (vector unsigned short)v3b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + sum_even = vec_add(sum_even, vec_mule(v1, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v1, ones)); + sum_even = vec_add(sum_even, vec_mule(v2, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v2, ones)); + sum_even = vec_add(sum_even, vec_mule(v3, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v3, ones)); + + p += 64; + len -= 64; + } +#endif + + while (len >= 32) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned char v1b = vec_vsx_ld(16, p); + vector unsigned short v0 = (vector unsigned short)v0b; + vector unsigned short v1 = (vector unsigned short)v1b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + sum_even = vec_add(sum_even, vec_mule(v1, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v1, ones)); + + p += 32; + len -= 32; + } + + while (len >= 16) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned short v0 = (vector unsigned short)v0b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + + p += 16; + len -= 16; + } + + { + vector unsigned int sum32 = vec_add(sum_even, sum_odd); + uint32_t partial[4] attribute((aligned(16))); + + vec_st(sum32, 0, partial); + sum64 += (uint64_t)partial[0] + partial[1] + + partial[2] + partial[3]; + } + + sum64 += sum_16b(p, len); + + sum64 = (sum64 >> 32) + (sum64 & 0xffffffff); + sum64 += sum64 >> 32; + + return (uint32_t)sum64; +} + +/ + * csum_unfolded() - Calculate the unfolded checksum of a data buffer. + * + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 32-bit unfolded checksum + / +/ NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) / +attribute((optimize("-fno-strict-aliasing"))) / See csum_16b() / +uint32_t csum_unfolded(const void buf, size_t len, uint32_t init) +{ + return csum_vsx(buf, len, init); +} +#else / !AVX2 && !POWER9_VECTOR && !POWER8_VECTOR / / * csum_unfolded() - Calculate the unfolded checksum of a data buffer. * @@ -495,7 +599,7 @@ uint32_t csum_unfolded(const void buf, size_t len, uint32_t init) { return sum_16b(buf, len) + init; } -#endif / !AVX2 / +#endif / !AVX2 && !POWER9_VECTOR && !POWER8_VECTOR */
/** * csum_iov_tail() - Calculate unfolded checksum for the tail of an IO vector -- 2.52.0
Reviewed-by: Laurent Vivier lvivier@redhat.com
Hi, On 2/7/26 23:31, jfiusdq wrote:
Microbenchmark of the checksum function vs C version at different buffer sizes:
Results (GB/s, higher is better; speedup = VSX / scalar):
64B: VSX 4.61 vs scalar 5.91 -> 0.78x (VSX slower for tiny buffers) 256B: VSX 10.91 vs scalar 7.57 -> 1.44x 1500B: VSX 13.88 vs scalar 6.89 -> 2.02x 16KB: VSX 14.53 vs scalar 6.96 -> 2.09x 64KB: VSX 15.15 vs scalar 6.85 -> 2.21x
Could you please share Microbenchmark ? Thanks, C.
On Friday, February 6th, 2026 at 3:17 PM, Laurent Vivier
wrote: On Thu, 05 Feb 2026 06:14:40 +0000, jfiusdq jfiusdq@proton.me wrote:
Tested with podman on Debian 13 for a while and works ok. It's difficult to run all the tests on POWER but 505-networking-pasta.bats test suite passes. --- checksum.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 3 deletions(-)
diff --git a/checksum.c b/checksum.c index 0c3837c..828f9ec 100644 --- a/checksum.c +++ b/checksum.c @@ -281,7 +281,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr, icmp6hr->icmp6_cksum = csum(payload, dlen, psum); }
-#ifdef AVX2 +#if defined(AVX2) #include
/** @@ -479,7 +479,111 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init)
return init; } -#else /* AVX2 / +#elif defined(POWER9_VECTOR) || defined(POWER8_VECTOR) +#include
+ +/* + * csum_vsx() - Compute 32-bit checksum using VSX SIMD instructions + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 32-bit checksum, not complemented, not folded + / +/ NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) / +attribute((optimize("-fno-strict-aliasing"))) / See csum_16b() */ +static uint32_t csum_vsx(const void buf, size_t len, uint32_t init) +{ + const uint8_t p = buf; + vector unsigned int sum_even = vec_splat_u32(0); + vector unsigned int sum_odd = vec_splat_u32(0); + const vector unsigned short ones = vec_splat_u16(1); + uint64_t sum64 = init; + +#ifdef POWER9_VECTOR + while (len >= 64) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned char v1b = vec_vsx_ld(16, p); + vector unsigned char v2b = vec_vsx_ld(32, p); + vector unsigned char v3b = vec_vsx_ld(48, p); + vector unsigned short v0 = (vector unsigned short)v0b; + vector unsigned short v1 = (vector unsigned short)v1b; + vector unsigned short v2 = (vector unsigned short)v2b; + vector unsigned short v3 = (vector unsigned short)v3b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + sum_even = vec_add(sum_even, vec_mule(v1, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v1, ones)); + sum_even = vec_add(sum_even, vec_mule(v2, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v2, ones)); + sum_even = vec_add(sum_even, vec_mule(v3, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v3, ones)); + + p += 64; + len -= 64; + } +#endif + + while (len >= 32) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned char v1b = vec_vsx_ld(16, p); + vector unsigned short v0 = (vector unsigned short)v0b; + vector unsigned short v1 = (vector unsigned short)v1b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + sum_even = vec_add(sum_even, vec_mule(v1, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v1, ones)); + + p += 32; + len -= 32; + } + + while (len >= 16) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned short v0 = (vector unsigned short)v0b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + + p += 16; + len -= 16; + } + + { + vector unsigned int sum32 = vec_add(sum_even, sum_odd); + uint32_t partial[4] attribute((aligned(16))); + + vec_st(sum32, 0, partial); + sum64 += (uint64_t)partial[0] + partial[1] + + partial[2] + partial[3]; + } + + sum64 += sum_16b(p, len); + + sum64 = (sum64 >> 32) + (sum64 & 0xffffffff); + sum64 += sum64 >> 32; + + return (uint32_t)sum64; +} + +/ + * csum_unfolded() - Calculate the unfolded checksum of a data buffer. + * + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 32-bit unfolded checksum + / +/ NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) / +attribute((optimize("-fno-strict-aliasing"))) / See csum_16b() / +uint32_t csum_unfolded(const void buf, size_t len, uint32_t init) +{ + return csum_vsx(buf, len, init); +} +#else / !AVX2 && !POWER9_VECTOR && !POWER8_VECTOR / / * csum_unfolded() - Calculate the unfolded checksum of a data buffer. * @@ -495,7 +599,7 @@ uint32_t csum_unfolded(const void buf, size_t len, uint32_t init) { return sum_16b(buf, len) + init; } -#endif / !AVX2 / +#endif / !AVX2 && !POWER9_VECTOR && !POWER8_VECTOR */ /** * csum_iov_tail() - Calculate unfolded checksum for the tail of an IO vector -- 2.52.0
Reviewed-by: Laurent Vivier lvivier@redhat.com
// bench_checksum.c
// Run from the source directory:
// gcc -O3 -D_GNU_SOURCE -mcpu=power9 -mvsx -maltivec \
// -I . -o /tmp/bench_vsx bench_checksum.c checksum.c
//
// gcc -O3 -D_GNU_SOURCE -mcpu=power9 -mno-vsx -mno-altivec -fno-tree-vectorize \
// -U__POWER8_VECTOR__ -U__POWER9_VECTOR__ \
// -I . -o /tmp/bench_scalar bench_checksum.c checksum.c
#include
Hi,
On 2/7/26 23:31, jfiusdq wrote:
Microbenchmark of the checksum function vs C version at different buffer sizes:
Results (GB/s, higher is better; speedup = VSX / scalar):
64B: VSX 4.61 vs scalar 5.91 -> 0.78x (VSX slower for tiny buffers) 256B: VSX 10.91 vs scalar 7.57 -> 1.44x 1500B: VSX 13.88 vs scalar 6.89 -> 2.02x 16KB: VSX 14.53 vs scalar 6.96 -> 2.09x 64KB: VSX 15.15 vs scalar 6.85 -> 2.21x
Could you please share Microbenchmark ?
Thanks,
C.
On Friday, February 6th, 2026 at 3:17 PM, Laurent Vivier
wrote: On Thu, 05 Feb 2026 06:14:40 +0000, jfiusdq jfiusdq@proton.me wrote:
Tested with podman on Debian 13 for a while and works ok. It's difficult to run all the tests on POWER but 505-networking-pasta.bats test suite passes. --- checksum.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 3 deletions(-)
diff --git a/checksum.c b/checksum.c index 0c3837c..828f9ec 100644 --- a/checksum.c +++ b/checksum.c @@ -281,7 +281,7 @@ void csum_icmp6(struct icmp6hdr *icmp6hr, icmp6hr->icmp6_cksum = csum(payload, dlen, psum); }
-#ifdef AVX2 +#if defined(AVX2) #include
/** @@ -479,7 +479,111 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init)
return init; } -#else /* AVX2 / +#elif defined(POWER9_VECTOR) || defined(POWER8_VECTOR) +#include
+ +/* + * csum_vsx() - Compute 32-bit checksum using VSX SIMD instructions + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 32-bit checksum, not complemented, not folded + / +/ NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) / +attribute((optimize("-fno-strict-aliasing"))) / See csum_16b() */ +static uint32_t csum_vsx(const void buf, size_t len, uint32_t init) +{ + const uint8_t p = buf; + vector unsigned int sum_even = vec_splat_u32(0); + vector unsigned int sum_odd = vec_splat_u32(0); + const vector unsigned short ones = vec_splat_u16(1); + uint64_t sum64 = init; + +#ifdef POWER9_VECTOR + while (len >= 64) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned char v1b = vec_vsx_ld(16, p); + vector unsigned char v2b = vec_vsx_ld(32, p); + vector unsigned char v3b = vec_vsx_ld(48, p); + vector unsigned short v0 = (vector unsigned short)v0b; + vector unsigned short v1 = (vector unsigned short)v1b; + vector unsigned short v2 = (vector unsigned short)v2b; + vector unsigned short v3 = (vector unsigned short)v3b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + sum_even = vec_add(sum_even, vec_mule(v1, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v1, ones)); + sum_even = vec_add(sum_even, vec_mule(v2, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v2, ones)); + sum_even = vec_add(sum_even, vec_mule(v3, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v3, ones)); + + p += 64; + len -= 64; + } +#endif + + while (len >= 32) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned char v1b = vec_vsx_ld(16, p); + vector unsigned short v0 = (vector unsigned short)v0b; + vector unsigned short v1 = (vector unsigned short)v1b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + sum_even = vec_add(sum_even, vec_mule(v1, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v1, ones)); + + p += 32; + len -= 32; + } + + while (len >= 16) { + vector unsigned char v0b = vec_vsx_ld(0, p); + vector unsigned short v0 = (vector unsigned short)v0b; + + sum_even = vec_add(sum_even, vec_mule(v0, ones)); + sum_odd = vec_add(sum_odd, vec_mulo(v0, ones)); + + p += 16; + len -= 16; + } + + { + vector unsigned int sum32 = vec_add(sum_even, sum_odd); + uint32_t partial[4] attribute((aligned(16))); + + vec_st(sum32, 0, partial); + sum64 += (uint64_t)partial[0] + partial[1] + + partial[2] + partial[3]; + } + + sum64 += sum_16b(p, len); + + sum64 = (sum64 >> 32) + (sum64 & 0xffffffff); + sum64 += sum64 >> 32; + + return (uint32_t)sum64; +} + +/ + * csum_unfolded() - Calculate the unfolded checksum of a data buffer. + * + * @buf: Input buffer + * @len: Input length + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * + * Return: 32-bit unfolded checksum + / +/ NOLINTNEXTLINE(clang-diagnostic-unknown-attributes) / +attribute((optimize("-fno-strict-aliasing"))) / See csum_16b() / +uint32_t csum_unfolded(const void buf, size_t len, uint32_t init) +{ + return csum_vsx(buf, len, init); +} +#else / !AVX2 && !POWER9_VECTOR && !POWER8_VECTOR / / * csum_unfolded() - Calculate the unfolded checksum of a data buffer. * @@ -495,7 +599,7 @@ uint32_t csum_unfolded(const void buf, size_t len, uint32_t init) { return sum_16b(buf, len) + init; } -#endif / !AVX2 / +#endif / !AVX2 && !POWER9_VECTOR && !POWER8_VECTOR */ /** * csum_iov_tail() - Calculate unfolded checksum for the tail of an IO vector -- 2.52.0
Reviewed-by: Laurent Vivier lvivier@redhat.com
On Thu, 05 Feb 2026 06:14:40 +0000
jfiusdq
Tested with podman on Debian 13 for a while and works ok. It's difficult to run all the tests on POWER but 505-networking-pasta.bats test suite passes.
Applied, thanks for the patch and for following up! As I mentioned, next time it would be nice to have a proper patch submission instead. You can use git send-email with Proton Mail without much trouble, see for example: https://zorleone.xyz/posts/hydroxide/ https://git-scm.com/docs/git-send-email#_sending_patches -- Stefano
participants (4)
-
Cédric Le Goater
-
jfiusdq
-
Laurent Vivier
-
Stefano Brivio