On Fri, Apr 03, 2026 at 06:38:07PM +0200, Laurent Vivier wrote:
The iov_tail passed to csum_iov_tail() may contain padding or trailing data beyond the actual L4 payload. Rather than relying on iov_tail_size() to determine how many bytes to checksum, pass the length explicitly so that only the relevant payload bytes are included in the checksum computation.
Signed-off-by: Laurent Vivier
--- checksum.c | 35 +++++++++++++++++++++-------------- checksum.h | 6 +++--- tap.c | 4 ++-- tcp.c | 9 +++++---- udp.c | 5 +++-- udp_vu.c | 12 +++++++----- 6 files changed, 41 insertions(+), 30 deletions(-) diff --git a/checksum.c b/checksum.c index 828f9ecc9c02..a8cf80ba7470 100644 --- a/checksum.c +++ b/checksum.c @@ -182,21 +182,22 @@ static uint16_t csum(const void *buf, size_t len, uint32_t init) * @saddr: IPv4 source address * @daddr: IPv4 destination address * @data: UDP payload (as IO vector tail) + * @l4len: UDP packet length including header */ void csum_udp4(struct udphdr *udp4hr, struct in_addr saddr, struct in_addr daddr, - struct iov_tail *data) + struct iov_tail *data, size_t l4len)
Passing @data which includes just the UDP payload, but a length which includes the L4 header seems odd, rather than passing just the payload length (@dlen, by convention).
{ /* UDP checksums are optional, so don't bother */ udp4hr->check = 0;
if (UDP4_REAL_CHECKSUMS) { - uint16_t l4len = iov_tail_size(data) + sizeof(struct udphdr); uint32_t psum = proto_ipv4_header_psum(l4len, IPPROTO_UDP, saddr, daddr);
- psum = csum_unfolded(udp4hr, sizeof(struct udphdr), psum); - udp4hr->check = csum_iov_tail(data, psum); + psum = csum_unfolded(udp4hr, sizeof(*udp4hr), psum); + udp4hr->check = csum_iov_tail(data, psum, + l4len - sizeof(*udp4hr));
..especially since what we actually need here is the payload length.
} }
@@ -245,19 +246,19 @@ uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol, * @saddr: Source address * @daddr: Destination address * @data: UDP payload (as IO vector tail) + * @l4len: UDP packet length including header */ void csum_udp6(struct udphdr *udp6hr, const struct in6_addr *saddr, const struct in6_addr *daddr, - struct iov_tail *data) + struct iov_tail *data, size_t l4len) { - uint16_t l4len = iov_tail_size(data) + sizeof(struct udphdr); uint32_t psum = proto_ipv6_header_psum(l4len, IPPROTO_UDP, saddr, daddr);
udp6hr->check = 0;
- psum = csum_unfolded(udp6hr, sizeof(struct udphdr), psum); - udp6hr->check = csum_iov_tail(data, psum); + psum = csum_unfolded(udp6hr, sizeof(*udp6hr), psum); + udp6hr->check = csum_iov_tail(data, psum, l4len - sizeof(*udp6hr));
Same comments here.
}
/** @@ -604,20 +605,26 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init) /** * csum_iov_tail() - Calculate unfolded checksum for the tail of an IO vector * @tail: IO vector tail to checksum - * @init Initial 32-bit checksum, 0 for no pre-computed checksum + * @init: Initial 32-bit checksum, 0 for no pre-computed checksum + * @len: Number of bytes to checksum from @tail
I admit this interface is slightly less elegant when it takes an explicit length, but I think it's worth it for less confusion in other places. I have sometimes wondered if it would make sense to replace iov_tail with something that represents a window with both start and end within an existing iovec array, maybe iov_slice? But that's not in scope for this series.
* * Return: 16-bit folded, complemented checksum */ -uint16_t csum_iov_tail(struct iov_tail *tail, uint32_t init) +uint16_t csum_iov_tail(struct iov_tail *tail, uint32_t init, size_t len) { if (iov_tail_prune(tail)) { - size_t i; + size_t i, n;
+ n = MIN(len, tail->iov[0].iov_len - tail->off); init = csum_unfolded((char *)tail->iov[0].iov_base + tail->off, - tail->iov[0].iov_len - tail->off, init); - for (i = 1; i < tail->cnt; i++) { + n, init); + len -= n; + + for (i = 1; len && i < tail->cnt; i++) { const struct iovec *iov = &tail->iov[i]; - init = csum_unfolded(iov->iov_base, iov->iov_len, init); + n = MIN(len, iov->iov_len); + init = csum_unfolded(iov->iov_base, n, init); + len -= n; } } return (uint16_t)~csum_fold(init); diff --git a/checksum.h b/checksum.h index 4e3b098db072..65834bf9eaaf 100644 --- a/checksum.h +++ b/checksum.h @@ -21,18 +21,18 @@ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol, struct in_addr saddr, struct in_addr daddr); void csum_udp4(struct udphdr *udp4hr, struct in_addr saddr, struct in_addr daddr, - struct iov_tail *data); + struct iov_tail *data, size_t l4len); void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t dlen); uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol, const struct in6_addr *saddr, const struct in6_addr *daddr); void csum_udp6(struct udphdr *udp6hr, const struct in6_addr *saddr, const struct in6_addr *daddr, - struct iov_tail *data); + struct iov_tail *data, size_t l4len); void csum_icmp6(struct icmp6hdr *icmp6hr, const struct in6_addr *saddr, const struct in6_addr *daddr, const void *payload, size_t dlen); uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init); -uint16_t csum_iov_tail(struct iov_tail *tail, uint32_t init); +uint16_t csum_iov_tail(struct iov_tail *tail, uint32_t init, size_t len);
#endif /* CHECKSUM_H */ diff --git a/tap.c b/tap.c index 1049e023bcd2..b61199dd699d 100644 --- a/tap.c +++ b/tap.c @@ -252,7 +252,7 @@ void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport, uh->source = htons(sport); uh->dest = htons(dport); uh->len = htons(l4len); - csum_udp4(uh, src, dst, &payload); + csum_udp4(uh, src, dst, &payload, l4len); return (char *)uh + sizeof(*uh); }
@@ -357,7 +357,7 @@ void *tap_push_uh6(struct udphdr *uh, uh->source = htons(sport); uh->dest = htons(dport); uh->len = htons(l4len); - csum_udp6(uh, src, dst, &payload); + csum_udp6(uh, src, dst, &payload, l4len); return (char *)uh + sizeof(*uh); }
diff --git a/tcp.c b/tcp.c index 8ea9be84a9f3..49c6fb57ce16 100644 --- a/tcp.c +++ b/tcp.c @@ -815,13 +815,14 @@ static void tcp_sock_set_nodelay(int s) * @psum: Unfolded partial checksum of the IPv4 or IPv6 pseudo-header * @th: TCP header (updated) * @payload: TCP payload + * @l4len: TCP packet length, including TCP header */ static void tcp_update_csum(uint32_t psum, struct tcphdr *th, - struct iov_tail *payload) + struct iov_tail *payload, size_t l4len)
Same comments about dlen vs l4len again.
{ th->check = 0; psum = csum_unfolded(th, sizeof(*th), psum); - th->check = csum_iov_tail(payload, psum); + th->check = csum_iov_tail(payload, psum, l4len - sizeof(*th)); }
/** @@ -1019,7 +1020,7 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn, if (no_tcp_csum) th->check = 0; else - tcp_update_csum(psum, th, payload); + tcp_update_csum(psum, th, payload, l4len);
return MAX(l3len + sizeof(struct ethhdr), ETH_ZLEN); } @@ -2196,7 +2197,7 @@ static void tcp_rst_no_conn(const struct ctx *c, int af, rsth->ack = 1; }
- tcp_update_csum(psum, rsth, &payload); + tcp_update_csum(psum, rsth, &payload, sizeof(*rsth)); rst_l2len = ((char *)rsth - buf) + sizeof(*rsth); tap_send_single(c, buf, rst_l2len); } diff --git a/udp.c b/udp.c index 1fc5a42c5ca7..e113b26bc726 100644 --- a/udp.c +++ b/udp.c @@ -289,7 +289,7 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, .iov_len = dlen }; struct iov_tail data = IOV_TAIL(&iov, 1, 0); - csum_udp4(&bp->uh, *src, *dst, &data); + csum_udp4(&bp->uh, *src, *dst, &data, l4len); }
return l4len; @@ -334,7 +334,8 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, .iov_len = dlen }; struct iov_tail data = IOV_TAIL(&iov, 1, 0); - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data); + csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data, + l4len); }
return l4len; diff --git a/udp_vu.c b/udp_vu.c index 9688fe1fdc5c..5421a7d71a19 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -147,9 +147,10 @@ static size_t udp_vu_prepare(const struct ctx *c, const struct iovec *iov, * @toside: Address information for one side of the flow * @iov: IO vector for the frame * @cnt: Number of IO vector entries + * @l4len: L4 length */ static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov, - size_t cnt) + size_t cnt, size_t l4len) { const struct in_addr *src4 = inany_v4(&toside->oaddr); const struct in_addr *dst4 = inany_v4(&toside->eaddr); @@ -160,11 +161,12 @@ static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov, if (src4 && dst4) { bp = vu_payloadv4(base); data = IOV_TAIL(iov, cnt, (char *)&bp->data - base); - csum_udp4(&bp->uh, *src4, *dst4, &data); + csum_udp4(&bp->uh, *src4, *dst4, &data, l4len); } else { bp = vu_payloadv6(base); data = IOV_TAIL(iov, cnt, (char *)&bp->data - base); - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data); + csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data, + l4len); } }
@@ -225,9 +227,9 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) vu_queue_rewind(vq, elem_cnt - elem_used);
if (iov_cnt > 0) { - udp_vu_prepare(c, iov_vu, toside, dlen); + size_t l4len = udp_vu_prepare(c, iov_vu, toside, dlen); if (*c->pcap) { - udp_vu_csum(toside, iov_vu, iov_cnt); + udp_vu_csum(toside, iov_vu, iov_cnt, l4len); pcap_iov(iov_vu, iov_cnt, VNET_HLEN); } vu_flush(vdev, vq, elem, elem_used); -- 2.53.0
-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson