Laurent recently reworked the TCP buffer handling to be split into various pieces tracked by iovecs. We'll want that for various future changes. This series makes a similar split for UDP buffers, which we'll want in order to allow dual-stack UDP sockets, amongst other things. This is based on my earlier series of cleanups for the TCP buffer handling. David Gibson (7): test: Allow sftp via vsock-ssh in tests udp: Split tap-bound UDP packets into multiple buffers using io vector udp: Combine initialisation of IPv4 and IPv6 iovs udp: Explicitly set checksum in guest-bound UDP headers udp: Share payload buffers between IPv4 and IPv6 udp: Use the same buffer for the L2 header for all frames udp: Single buffer for IPv4, IPv6 headers and metadata tap.h | 38 ------ test/passt.mbuto | 6 +- udp.c | 304 +++++++++++++++++++++++------------------------ 3 files changed, 153 insertions(+), 195 deletions(-) -- 2.44.0
During some debugging recently, I wanted to extact a file from a test guest and found it was tricky, since the ssh-over-vsock setup we had didn't allow sftp/scp. We can fix this by adding a line to the guest side sshd config from mbuto. While we're there correct an inaccurate comment. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- test/passt.mbuto | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/passt.mbuto b/test/passt.mbuto index 6240d5c1..436eecc5 100755 --- a/test/passt.mbuto +++ b/test/passt.mbuto @@ -54,7 +54,7 @@ EOF ln -s /run /var/run :> /etc/fstab - # sshd(dropbear) via vsock + # sshd via vsock cat > /etc/passwd << EOF root:x:0:0:root:/root:/bin/sh sshd:x:100:100:Privilege-separated SSH:/var/empty/sshd:/sbin/nologin @@ -64,7 +64,9 @@ root:::0:99999:7::: EOF chmod 000 /etc/shadow - :> /etc/ssh/sshd_config + cat > /etc/ssh/sshd_config << EOF +Subsystem sftp internal-sftp +EOF ssh-keygen -A chmod 700 /root/.ssh chmod 700 /run/sshd -- 2.44.0
When sending to the tap device, currently we assemble the headers and payload into a single contiguous buffer. Those are described by a single struct iovec, then a batch of frames is sent to the device with tap_send_frames(). In order to better integrate the IPv4 and IPv6 paths, we want the IP header in a different buffer that might not be contiguous with the payload. To prepare for that, split the UDP packet into an iovec of buffers. We use the same split that Laurent recently introduced for TCP for convenience. This removes the last use of tap_hdr_len_(), tap_frame_base() and tap_frame_len(), so remove those too. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tap.h | 38 ------------------------------ udp.c | 74 +++++++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 49 insertions(+), 63 deletions(-) diff --git a/tap.h b/tap.h index 75aa3f03..754703d2 100644 --- a/tap.h +++ b/tap.h @@ -43,44 +43,6 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len) thdr->vnet_len = htonl(l2len); } -static inline size_t tap_hdr_len_(const struct ctx *c) -{ - if (c->mode == MODE_PASST) - return sizeof(struct tap_hdr); - else - return 0; -} - -/** - * tap_frame_base() - Find start of tap frame - * @c: Execution context - * @taph: Pointer to tap specific header buffer - * - * Returns: pointer to the start of tap frame - suitable for an - * iov_base to be passed to tap_send_frames()) - */ -static inline void *tap_frame_base(const struct ctx *c, struct tap_hdr *taph) -{ - return (char *)(taph + 1) - tap_hdr_len_(c); -} - -/** - * tap_frame_len() - Finalize tap frame and return total length - * @c: Execution context - * @taph: Tap header to finalize - * @plen: L2 packet length (includes L2, excludes tap specific headers) - * - * Returns: length of the tap frame including tap specific headers - suitable - * for an iov_len to be passed to tap_send_frames() - */ -static inline size_t tap_frame_len(const struct ctx *c, struct tap_hdr *taph, - size_t plen) -{ - if (c->mode == MODE_PASST) - taph->vnet_len = htonl(plen); - return plen + tap_hdr_len_(c); -} - struct in_addr tap_ip4_daddr(const struct ctx *c); void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport, struct in_addr dst, in_port_t dport, diff --git a/udp.c b/udp.c index 545212c5..4650b366 100644 --- a/udp.c +++ b/udp.c @@ -222,12 +222,28 @@ struct udp6_l2_buf_t { #endif udp6_l2_buf[UDP_MAX_FRAMES]; +/** + * enum udp_iov_idx - Indices for the buffers making up a single UDP frame + * @UDP_IOV_TAP TAP specific header + * @UDP_IOV_ETH ethernet header + * @UDP_IOV_IP IP (v4/v6) header + * @UDP_IOV_PAYLOAD IP payload (UDP header + data) + * @UDP_NUM_IOVS the number of entries in the iovec array + */ +enum udp_iov_idx { + UDP_IOV_TAP = 0, + UDP_IOV_ETH = 1, + UDP_IOV_IP = 2, + UDP_IOV_PAYLOAD = 3, + UDP_NUM_IOVS +}; + /* recvmmsg()/sendmmsg() data for tap */ static struct iovec udp4_l2_iov_sock [UDP_MAX_FRAMES]; static struct iovec udp6_l2_iov_sock [UDP_MAX_FRAMES]; -static struct iovec udp4_l2_iov_tap [UDP_MAX_FRAMES]; -static struct iovec udp6_l2_iov_tap [UDP_MAX_FRAMES]; +static struct iovec udp4_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS]; +static struct iovec udp6_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS]; static struct mmsghdr udp4_l2_mh_sock [UDP_MAX_FRAMES]; static struct mmsghdr udp6_l2_mh_sock [UDP_MAX_FRAMES]; @@ -309,7 +325,7 @@ static void udp_sock4_iov_init_one(const struct ctx *c, size_t i) struct msghdr *mh = &udp4_l2_mh_sock[i].msg_hdr; struct udp4_l2_buf_t *buf = &udp4_l2_buf[i]; struct iovec *siov = &udp4_l2_iov_sock[i]; - struct iovec *tiov = &udp4_l2_iov_tap[i]; + struct iovec *tiov = udp4_l2_iov_tap[i]; *buf = (struct udp4_l2_buf_t) { .eh = ETH_HDR_INIT(ETH_P_IP), @@ -323,7 +339,10 @@ static void udp_sock4_iov_init_one(const struct ctx *c, size_t i) mh->msg_iov = siov; mh->msg_iovlen = 1; - tiov->iov_base = tap_frame_base(c, &buf->taph); + tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); + tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); + tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->iph); + tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; } /** @@ -336,7 +355,7 @@ static void udp_sock6_iov_init_one(const struct ctx *c, size_t i) struct msghdr *mh = &udp6_l2_mh_sock[i].msg_hdr; struct udp6_l2_buf_t *buf = &udp6_l2_buf[i]; struct iovec *siov = &udp6_l2_iov_sock[i]; - struct iovec *tiov = &udp6_l2_iov_tap[i]; + struct iovec *tiov = udp6_l2_iov_tap[i]; *buf = (struct udp6_l2_buf_t) { .eh = ETH_HDR_INIT(ETH_P_IPV6), @@ -350,7 +369,10 @@ static void udp_sock6_iov_init_one(const struct ctx *c, size_t i) mh->msg_iov = siov; mh->msg_iovlen = 1; - tiov->iov_base = tap_frame_base(c, &buf->taph); + tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); + tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); + tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->ip6h); + tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; } /** @@ -572,13 +594,14 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n, * @plen: Length of UDP payload * @now: Current timestamp * - * Return: size of tap frame with headers + * Return: size of IPv4 payload (UDP header + data) */ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, in_port_t dstport, size_t plen, const struct timespec *now) { - size_t l3len = plen + sizeof(b->iph) + sizeof(b->uh); + size_t l4len = plen + sizeof(b->uh); + size_t l3len = l4len + sizeof(b->iph); in_port_t srcport = ntohs(b->s_in.sin_port); struct in_addr src = b->s_in.sin_addr; @@ -609,9 +632,10 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, b->uh.source = b->s_in.sin_port; b->uh.dest = htons(dstport); - b->uh.len = htons(plen + sizeof(b->uh)); + b->uh.len = htons(l4len); - return tap_frame_len(c, &b->taph, l3len + sizeof(b->eh)); + tap_hdr_update(&b->taph, l3len + sizeof(b->eh)); + return l4len; } /** @@ -622,7 +646,7 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, * @plen: Length of UDP payload * @now: Current timestamp * - * Return: size of tap frame with headers + * Return: size of IPv6 payload (UDP header + data) */ static size_t udp_update_hdr6(const struct ctx *c, struct udp6_l2_buf_t *b, in_port_t dstport, size_t plen, @@ -679,8 +703,8 @@ static size_t udp_update_hdr6(const struct ctx *c, struct udp6_l2_buf_t *b, b->uh.len = b->ip6h.payload_len; csum_udp6(&b->uh, src, dst, b->data, plen); - return tap_frame_len(c, &b->taph, l4len + sizeof(b->ip6h) - + sizeof(b->eh)); + tap_hdr_update(&b->taph, l4len + sizeof(b->ip6h) + sizeof(b->eh)); + return l4len; } /** @@ -698,8 +722,8 @@ static void udp_tap_send(const struct ctx *c, unsigned int start, unsigned int n, in_port_t dstport, bool v6, const struct timespec *now) { - struct iovec *tap_iov; - unsigned int i; + struct iovec (*tap_iov)[UDP_NUM_IOVS]; + size_t i; if (v6) tap_iov = udp6_l2_iov_tap; @@ -707,19 +731,19 @@ static void udp_tap_send(const struct ctx *c, tap_iov = udp4_l2_iov_tap; for (i = start; i < start + n; i++) { - size_t buf_len; - - if (v6) - buf_len = udp_update_hdr6(c, &udp6_l2_buf[i], dstport, - udp6_l2_mh_sock[i].msg_len, now); - else - buf_len = udp_update_hdr4(c, &udp4_l2_buf[i], dstport, - udp4_l2_mh_sock[i].msg_len, now); + size_t l4len; - tap_iov[i].iov_len = buf_len; + if (v6) { + l4len = udp_update_hdr6(c, &udp6_l2_buf[i], dstport, + udp6_l2_mh_sock[i].msg_len, now); + } else { + l4len = udp_update_hdr4(c, &udp4_l2_buf[i], dstport, + udp4_l2_mh_sock[i].msg_len, now); + } + tap_iov[i][UDP_IOV_PAYLOAD].iov_len = l4len; } - tap_send_frames(c, tap_iov + start, 1, n); + tap_send_frames(c, &tap_iov[start][0], UDP_NUM_IOVS, n); } /** -- 2.44.0
On Tue, 30 Apr 2024 20:05:36 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:When sending to the tap device, currently we assemble the headers and payload into a single contiguous buffer. Those are described by a single struct iovec, then a batch of frames is sent to the device with tap_send_frames(). In order to better integrate the IPv4 and IPv6 paths, we want the IP header in a different buffer that might not be contiguous with the payload. To prepare for that, split the UDP packet into an iovec of buffers. We use the same split that Laurent recently introduced for TCP for convenience. This removes the last use of tap_hdr_len_(), tap_frame_base() and tap_frame_len(), so remove those too. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tap.h | 38 ------------------------------ udp.c | 74 +++++++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 49 insertions(+), 63 deletions(-) diff --git a/tap.h b/tap.h index 75aa3f03..754703d2 100644 --- a/tap.h +++ b/tap.h @@ -43,44 +43,6 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len) thdr->vnet_len = htonl(l2len); } -static inline size_t tap_hdr_len_(const struct ctx *c) -{ - if (c->mode == MODE_PASST) - return sizeof(struct tap_hdr); - else - return 0; -} - -/** - * tap_frame_base() - Find start of tap frame - * @c: Execution context - * @taph: Pointer to tap specific header buffer - * - * Returns: pointer to the start of tap frame - suitable for an - * iov_base to be passed to tap_send_frames()) - */ -static inline void *tap_frame_base(const struct ctx *c, struct tap_hdr *taph) -{ - return (char *)(taph + 1) - tap_hdr_len_(c); -} - -/** - * tap_frame_len() - Finalize tap frame and return total length - * @c: Execution context - * @taph: Tap header to finalize - * @plen: L2 packet length (includes L2, excludes tap specific headers) - * - * Returns: length of the tap frame including tap specific headers - suitable - * for an iov_len to be passed to tap_send_frames() - */ -static inline size_t tap_frame_len(const struct ctx *c, struct tap_hdr *taph, - size_t plen) -{ - if (c->mode == MODE_PASST) - taph->vnet_len = htonl(plen); - return plen + tap_hdr_len_(c); -} - struct in_addr tap_ip4_daddr(const struct ctx *c); void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport, struct in_addr dst, in_port_t dport, diff --git a/udp.c b/udp.c index 545212c5..4650b366 100644 --- a/udp.c +++ b/udp.c @@ -222,12 +222,28 @@ struct udp6_l2_buf_t { #endif udp6_l2_buf[UDP_MAX_FRAMES]; +/** + * enum udp_iov_idx - Indices for the buffers making up a single UDP frame + * @UDP_IOV_TAP TAP specific headerNits: s/TAP/tap/ and+ * @UDP_IOV_ETH ethernet headers/ethernet/Ethernet. -- Stefano
On Tue, Apr 30, 2024 at 10:15:34PM +0200, Stefano Brivio wrote:On Tue, 30 Apr 2024 20:05:36 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:Amended, thanks. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibsonWhen sending to the tap device, currently we assemble the headers and payload into a single contiguous buffer. Those are described by a single struct iovec, then a batch of frames is sent to the device with tap_send_frames(). In order to better integrate the IPv4 and IPv6 paths, we want the IP header in a different buffer that might not be contiguous with the payload. To prepare for that, split the UDP packet into an iovec of buffers. We use the same split that Laurent recently introduced for TCP for convenience. This removes the last use of tap_hdr_len_(), tap_frame_base() and tap_frame_len(), so remove those too. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tap.h | 38 ------------------------------ udp.c | 74 +++++++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 49 insertions(+), 63 deletions(-) diff --git a/tap.h b/tap.h index 75aa3f03..754703d2 100644 --- a/tap.h +++ b/tap.h @@ -43,44 +43,6 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len) thdr->vnet_len = htonl(l2len); } -static inline size_t tap_hdr_len_(const struct ctx *c) -{ - if (c->mode == MODE_PASST) - return sizeof(struct tap_hdr); - else - return 0; -} - -/** - * tap_frame_base() - Find start of tap frame - * @c: Execution context - * @taph: Pointer to tap specific header buffer - * - * Returns: pointer to the start of tap frame - suitable for an - * iov_base to be passed to tap_send_frames()) - */ -static inline void *tap_frame_base(const struct ctx *c, struct tap_hdr *taph) -{ - return (char *)(taph + 1) - tap_hdr_len_(c); -} - -/** - * tap_frame_len() - Finalize tap frame and return total length - * @c: Execution context - * @taph: Tap header to finalize - * @plen: L2 packet length (includes L2, excludes tap specific headers) - * - * Returns: length of the tap frame including tap specific headers - suitable - * for an iov_len to be passed to tap_send_frames() - */ -static inline size_t tap_frame_len(const struct ctx *c, struct tap_hdr *taph, - size_t plen) -{ - if (c->mode == MODE_PASST) - taph->vnet_len = htonl(plen); - return plen + tap_hdr_len_(c); -} - struct in_addr tap_ip4_daddr(const struct ctx *c); void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport, struct in_addr dst, in_port_t dport, diff --git a/udp.c b/udp.c index 545212c5..4650b366 100644 --- a/udp.c +++ b/udp.c @@ -222,12 +222,28 @@ struct udp6_l2_buf_t { #endif udp6_l2_buf[UDP_MAX_FRAMES]; +/** + * enum udp_iov_idx - Indices for the buffers making up a single UDP frame + * @UDP_IOV_TAP TAP specific headerNits: s/TAP/tap/ and+ * @UDP_IOV_ETH ethernet headers/ethernet/Ethernet.
We're going to introduce more sharing between the IPv4 and IPv6 buffer structures. Prepare for this by combinng the initialisation functions. While we're at it remove the misleading "sock" from the name since these initialise both tap side and sock side structures. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 114 +++++++++++++++++++++++++++------------------------------- 1 file changed, 53 insertions(+), 61 deletions(-) diff --git a/udp.c b/udp.c index 4650b366..269a90e6 100644 --- a/udp.c +++ b/udp.c @@ -316,79 +316,71 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) } /** - * udp_sock4_iov_init_one() - Initialise a scatter-gather L2 buffer for IPv4 + * udp_iov_init_one() - Initialise scatter-gather lists for one buffer * @c: Execution context * @i: Index of buffer to initialize */ -static void udp_sock4_iov_init_one(const struct ctx *c, size_t i) +static void udp_iov_init_one(const struct ctx *c, size_t i) { - struct msghdr *mh = &udp4_l2_mh_sock[i].msg_hdr; - struct udp4_l2_buf_t *buf = &udp4_l2_buf[i]; - struct iovec *siov = &udp4_l2_iov_sock[i]; - struct iovec *tiov = udp4_l2_iov_tap[i]; - - *buf = (struct udp4_l2_buf_t) { - .eh = ETH_HDR_INIT(ETH_P_IP), - .iph = L2_BUF_IP4_INIT(IPPROTO_UDP) - }; - - *siov = IOV_OF_LVALUE(buf->data); - - mh->msg_name = &buf->s_in; - mh->msg_namelen = sizeof(buf->s_in); - mh->msg_iov = siov; - mh->msg_iovlen = 1; - - tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); - tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); - tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->iph); - tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; -} + if (c->ifi4) { + struct msghdr *mh = &udp4_l2_mh_sock[i].msg_hdr; + struct udp4_l2_buf_t *buf = &udp4_l2_buf[i]; + struct iovec *siov = &udp4_l2_iov_sock[i]; + struct iovec *tiov = udp4_l2_iov_tap[i]; + + *buf = (struct udp4_l2_buf_t) { + .eh = ETH_HDR_INIT(ETH_P_IP), + .iph = L2_BUF_IP4_INIT(IPPROTO_UDP) + }; -/** - * udp_sock6_iov_init_one() - Initialise a scatter-gather L2 buffer for IPv6 - * @c: Execution context - * @i: Index of buffer to initialize - */ -static void udp_sock6_iov_init_one(const struct ctx *c, size_t i) -{ - struct msghdr *mh = &udp6_l2_mh_sock[i].msg_hdr; - struct udp6_l2_buf_t *buf = &udp6_l2_buf[i]; - struct iovec *siov = &udp6_l2_iov_sock[i]; - struct iovec *tiov = udp6_l2_iov_tap[i]; - - *buf = (struct udp6_l2_buf_t) { - .eh = ETH_HDR_INIT(ETH_P_IPV6), - .ip6h = L2_BUF_IP6_INIT(IPPROTO_UDP) - }; - - *siov = IOV_OF_LVALUE(buf->data); - - mh->msg_name = &buf->s_in6; - mh->msg_namelen = sizeof(buf->s_in6); - mh->msg_iov = siov; - mh->msg_iovlen = 1; - - tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); - tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); - tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->ip6h); - tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; + *siov = IOV_OF_LVALUE(buf->data); + + mh->msg_name = &buf->s_in; + mh->msg_namelen = sizeof(buf->s_in); + mh->msg_iov = siov; + mh->msg_iovlen = 1; + + tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); + tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); + tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->iph); + tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; + } + + if (c->ifi6) { + struct msghdr *mh = &udp6_l2_mh_sock[i].msg_hdr; + struct udp6_l2_buf_t *buf = &udp6_l2_buf[i]; + struct iovec *siov = &udp6_l2_iov_sock[i]; + struct iovec *tiov = udp6_l2_iov_tap[i]; + + *buf = (struct udp6_l2_buf_t) { + .eh = ETH_HDR_INIT(ETH_P_IPV6), + .ip6h = L2_BUF_IP6_INIT(IPPROTO_UDP) + }; + + *siov = IOV_OF_LVALUE(buf->data); + + mh->msg_name = &buf->s_in6; + mh->msg_namelen = sizeof(buf->s_in6); + mh->msg_iov = siov; + mh->msg_iovlen = 1; + + tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); + tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); + tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->ip6h); + tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; + } } /** - * udp_sock_iov_init() - Initialise scatter-gather L2 buffers + * udp_iov_init() - Initialise scatter-gather L2 buffers * @c: Execution context */ -static void udp_sock_iov_init(const struct ctx *c) +static void udp_iov_init(const struct ctx *c) { size_t i; - for (i = 0; i < UDP_MAX_FRAMES; i++) { - if (c->ifi4) - udp_sock4_iov_init_one(c, i); - if (c->ifi6) - udp_sock6_iov_init_one(c, i); - } + for (i = 0; i < UDP_MAX_FRAMES; i++) + udp_iov_init_one(c, i); } /** @@ -1259,7 +1251,7 @@ v6: */ int udp_init(struct ctx *c) { - udp_sock_iov_init(c); + udp_iov_init(c); udp_invert_portmap(&c->udp.fwd_in); udp_invert_portmap(&c->udp.fwd_out); -- 2.44.0
For IPv4, UDP checksums are optional and can just be set to 0. udp_update_hdr4() ignores the checksum field entirely. Since these are set to 0 during startup, this works as intended for now. However, we'd like to share payload and UDP header buffers betweem IPv4 and IPv6, which does calculate UDP checksums. Therefore, for robustness, we should explicitly set the checksum field to 0 for guest-bound UDP packets. In the tap_udp4_send() slow path, however, we do allow IPv4 UDP checksums to be calculated as a compile time option. For consistency, use the same thing in the udp_update_hdr4() path, which will typically initialize to 0, but calculate a real checksum if configured to do so. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/udp.c b/udp.c index 269a90e6..86d0419f 100644 --- a/udp.c +++ b/udp.c @@ -592,6 +592,7 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, in_port_t dstport, size_t plen, const struct timespec *now) { + const struct in_addr dst = c->ip4.addr_seen; size_t l4len = plen + sizeof(b->uh); size_t l3len = l4len + sizeof(b->iph); in_port_t srcport = ntohs(b->s_in.sin_port); @@ -617,7 +618,7 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, } b->iph.tot_len = htons(l3len); - b->iph.daddr = c->ip4.addr_seen.s_addr; + b->iph.daddr = dst.s_addr; b->iph.saddr = src.s_addr; b->iph.check = csum_ip4_header(b->iph.tot_len, IPPROTO_UDP, src, c->ip4.addr_seen); @@ -625,6 +626,7 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, b->uh.source = b->s_in.sin_port; b->uh.dest = htons(dstport); b->uh.len = htons(l4len); + csum_udp4(&b->uh, src, dst, b->data, plen); tap_hdr_update(&b->taph, l3len + sizeof(b->eh)); return l4len; -- 2.44.0
Currently the IPv4 and IPv6 paths unnecessarily use different buffers for the UDP payload. Now that we're handling the various pieces of the UDP packets with an iov, we can split the payload part of the buffers off into its own array shared between IPv4 and IPv6. As well as saving a little memory, this allows the payload buffers to be neatly page aligned. With the buffers merged, udp[46]_l2_iov_sock contain exactly the same thing as each other and can also be merged. Likewise udp[46]_iov_splice can be merged together. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 127 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/udp.c b/udp.c index 86d0419f..8c726056 100644 --- a/udp.c +++ b/udp.c @@ -171,14 +171,22 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8) /* Static buffers */ +static struct udp_payload { + struct udphdr uh; + char data[USHRT_MAX - sizeof(struct udphdr)]; +#ifdef __AVX2__ +} __attribute__ ((packed, aligned(32))) +#else +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) +#endif +udp_payload_buf[UDP_MAX_FRAMES]; + /** * udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections * @s_in: Source socket address, filled in by recvmmsg() * @taph: Tap backend specific header * @eh: Prefilled ethernet header * @iph: Pre-filled IP header (except for tot_len and saddr) - * @uh: Headroom for UDP header - * @data: Storage for UDP payload */ static struct udp4_l2_buf_t { struct sockaddr_in s_in; @@ -186,9 +194,6 @@ static struct udp4_l2_buf_t { struct tap_hdr taph; struct ethhdr eh; struct iphdr iph; - struct udphdr uh; - uint8_t data[USHRT_MAX - - (sizeof(struct iphdr) + sizeof(struct udphdr))]; } __attribute__ ((packed, aligned(__alignof__(unsigned int)))) udp4_l2_buf[UDP_MAX_FRAMES]; @@ -198,8 +203,6 @@ udp4_l2_buf[UDP_MAX_FRAMES]; * @taph: Tap backend specific header * @eh: Pre-filled ethernet header * @ip6h: Pre-filled IP header (except for payload_len and addresses) - * @uh: Headroom for UDP header - * @data: Storage for UDP payload */ struct udp6_l2_buf_t { struct sockaddr_in6 s_in6; @@ -212,9 +215,6 @@ struct udp6_l2_buf_t { struct tap_hdr taph; struct ethhdr eh; struct ipv6hdr ip6h; - struct udphdr uh; - uint8_t data[USHRT_MAX - - (sizeof(struct ipv6hdr) + sizeof(struct udphdr))]; #ifdef __AVX2__ } __attribute__ ((packed, aligned(32))) #else @@ -239,8 +239,7 @@ enum udp_iov_idx { }; /* recvmmsg()/sendmmsg() data for tap */ -static struct iovec udp4_l2_iov_sock [UDP_MAX_FRAMES]; -static struct iovec udp6_l2_iov_sock [UDP_MAX_FRAMES]; +static struct iovec udp_l2_iov_sock [UDP_MAX_FRAMES]; static struct iovec udp4_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS]; static struct iovec udp6_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS]; @@ -249,8 +248,7 @@ static struct mmsghdr udp4_l2_mh_sock [UDP_MAX_FRAMES]; static struct mmsghdr udp6_l2_mh_sock [UDP_MAX_FRAMES]; /* recvmmsg()/sendmmsg() data for "spliced" connections */ -static struct iovec udp4_iov_splice [UDP_MAX_FRAMES]; -static struct iovec udp6_iov_splice [UDP_MAX_FRAMES]; +static struct iovec udp_iov_splice [UDP_MAX_FRAMES]; static struct sockaddr_in udp4_localname = { .sin_family = AF_INET, @@ -261,8 +259,8 @@ static struct sockaddr_in6 udp6_localname = { .sin6_addr = IN6ADDR_LOOPBACK_INIT, }; -static struct mmsghdr udp4_mh_splice [UDP_MAX_FRAMES]; -static struct mmsghdr udp6_mh_splice [UDP_MAX_FRAMES]; +static struct mmsghdr udp4_mh_splice [UDP_MAX_FRAMES]; +static struct mmsghdr udp6_mh_splice [UDP_MAX_FRAMES]; /** * udp_portmap_clear() - Clear UDP port map before configuration @@ -322,10 +320,14 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) */ static void udp_iov_init_one(const struct ctx *c, size_t i) { + struct udp_payload *payload = &udp_payload_buf[i]; + struct iovec *siov = &udp_l2_iov_sock[i]; + + *siov = IOV_OF_LVALUE(payload->data); + if (c->ifi4) { struct msghdr *mh = &udp4_l2_mh_sock[i].msg_hdr; struct udp4_l2_buf_t *buf = &udp4_l2_buf[i]; - struct iovec *siov = &udp4_l2_iov_sock[i]; struct iovec *tiov = udp4_l2_iov_tap[i]; *buf = (struct udp4_l2_buf_t) { @@ -333,8 +335,6 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) .iph = L2_BUF_IP4_INIT(IPPROTO_UDP) }; - *siov = IOV_OF_LVALUE(buf->data); - mh->msg_name = &buf->s_in; mh->msg_namelen = sizeof(buf->s_in); mh->msg_iov = siov; @@ -343,13 +343,12 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->iph); - tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; + tiov[UDP_IOV_PAYLOAD].iov_base = payload; } if (c->ifi6) { struct msghdr *mh = &udp6_l2_mh_sock[i].msg_hdr; struct udp6_l2_buf_t *buf = &udp6_l2_buf[i]; - struct iovec *siov = &udp6_l2_iov_sock[i]; struct iovec *tiov = udp6_l2_iov_tap[i]; *buf = (struct udp6_l2_buf_t) { @@ -357,8 +356,6 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) .ip6h = L2_BUF_IP6_INIT(IPPROTO_UDP) }; - *siov = IOV_OF_LVALUE(buf->data); - mh->msg_name = &buf->s_in6; mh->msg_namelen = sizeof(buf->s_in6); mh->msg_iov = siov; @@ -367,7 +364,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->ip6h); - tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; + tiov[UDP_IOV_PAYLOAD].iov_base = payload; } } @@ -581,22 +578,24 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n, /** * udp_update_hdr4() - Update headers for one IPv4 datagram * @c: Execution context - * @b: Pointer to udp4_l2_buf to update + * @bh: Pointer to udp4_l2_buf to update + * @bp: Pointer to udp_payload to update * @dstport: Destination port number * @plen: Length of UDP payload * @now: Current timestamp * * Return: size of IPv4 payload (UDP header + data) */ -static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, +static size_t udp_update_hdr4(const struct ctx *c, + struct udp4_l2_buf_t *bh, struct udp_payload *bp, in_port_t dstport, size_t plen, const struct timespec *now) { + in_port_t srcport = ntohs(bh->s_in.sin_port); const struct in_addr dst = c->ip4.addr_seen; - size_t l4len = plen + sizeof(b->uh); - size_t l3len = l4len + sizeof(b->iph); - in_port_t srcport = ntohs(b->s_in.sin_port); - struct in_addr src = b->s_in.sin_addr; + struct in_addr src = bh->s_in.sin_addr; + size_t l4len = plen + sizeof(bp->uh); + size_t l3len = l4len + sizeof(bh->iph); if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) && IN4_ARE_ADDR_EQUAL(&src, &c->ip4.dns_host) && srcport == 53 && @@ -617,39 +616,41 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, src = c->ip4.gw; } - b->iph.tot_len = htons(l3len); - b->iph.daddr = dst.s_addr; - b->iph.saddr = src.s_addr; - b->iph.check = csum_ip4_header(b->iph.tot_len, IPPROTO_UDP, - src, c->ip4.addr_seen); + bh->iph.tot_len = htons(l3len); + bh->iph.daddr = c->ip4.addr_seen.s_addr; + bh->iph.saddr = src.s_addr; + bh->iph.check = csum_ip4_header(bh->iph.tot_len, IPPROTO_UDP, + src, c->ip4.addr_seen); - b->uh.source = b->s_in.sin_port; - b->uh.dest = htons(dstport); - b->uh.len = htons(l4len); - csum_udp4(&b->uh, src, dst, b->data, plen); + bp->uh.source = bh->s_in.sin_port; + bp->uh.dest = htons(dstport); + bp->uh.len = htons(l4len); + csum_udp4(&bp->uh, src, dst, bp->data, plen); - tap_hdr_update(&b->taph, l3len + sizeof(b->eh)); + tap_hdr_update(&bh->taph, l3len + sizeof(bh->eh)); return l4len; } /** * udp_update_hdr6() - Update headers for one IPv6 datagram * @c: Execution context - * @b: Pointer to udp6_l2_buf to update + * @bh: Pointer to udp6_l2_buf to update + * @bp: Pointer to udp_payload to update * @dstport: Destination port number * @plen: Length of UDP payload * @now: Current timestamp * * Return: size of IPv6 payload (UDP header + data) */ -static size_t udp_update_hdr6(const struct ctx *c, struct udp6_l2_buf_t *b, +static size_t udp_update_hdr6(const struct ctx *c, + struct udp6_l2_buf_t *bh, struct udp_payload *bp, in_port_t dstport, size_t plen, const struct timespec *now) { - const struct in6_addr *src = &b->s_in6.sin6_addr; + const struct in6_addr *src = &bh->s_in6.sin6_addr; const struct in6_addr *dst = &c->ip6.addr_seen; - in_port_t srcport = ntohs(b->s_in6.sin6_port); - uint16_t l4len = plen + sizeof(b->uh); + in_port_t srcport = ntohs(bh->s_in6.sin6_port); + uint16_t l4len = plen + sizeof(bp->uh); if (IN6_IS_ADDR_LINKLOCAL(src)) { dst = &c->ip6.addr_ll_seen; @@ -685,19 +686,19 @@ static size_t udp_update_hdr6(const struct ctx *c, struct udp6_l2_buf_t *b, } - b->ip6h.payload_len = htons(l4len); - b->ip6h.daddr = *dst; - b->ip6h.saddr = *src; - b->ip6h.version = 6; - b->ip6h.nexthdr = IPPROTO_UDP; - b->ip6h.hop_limit = 255; + bh->ip6h.payload_len = htons(l4len); + bh->ip6h.daddr = *dst; + bh->ip6h.saddr = *src; + bh->ip6h.version = 6; + bh->ip6h.nexthdr = IPPROTO_UDP; + bh->ip6h.hop_limit = 255; - b->uh.source = b->s_in6.sin6_port; - b->uh.dest = htons(dstport); - b->uh.len = b->ip6h.payload_len; - csum_udp6(&b->uh, src, dst, b->data, plen); + bp->uh.source = bh->s_in6.sin6_port; + bp->uh.dest = htons(dstport); + bp->uh.len = bh->ip6h.payload_len; + csum_udp6(&bp->uh, src, dst, bp->data, plen); - tap_hdr_update(&b->taph, l4len + sizeof(b->ip6h) + sizeof(b->eh)); + tap_hdr_update(&bh->taph, l4len + sizeof(bh->ip6h) + sizeof(bh->eh)); return l4len; } @@ -725,13 +726,16 @@ static void udp_tap_send(const struct ctx *c, tap_iov = udp4_l2_iov_tap; for (i = start; i < start + n; i++) { + struct udp_payload *bp = &udp_payload_buf[i]; size_t l4len; if (v6) { - l4len = udp_update_hdr6(c, &udp6_l2_buf[i], dstport, + l4len = udp_update_hdr6(c, &udp6_l2_buf[i], bp, + dstport, udp6_l2_mh_sock[i].msg_len, now); } else { - l4len = udp_update_hdr4(c, &udp4_l2_buf[i], dstport, + l4len = udp_update_hdr4(c, &udp4_l2_buf[i], bp, + dstport, udp4_l2_mh_sock[i].msg_len, now); } tap_iov[i][UDP_IOV_PAYLOAD].iov_len = l4len; @@ -1078,11 +1082,10 @@ static void udp_splice_iov_init(void) mh6->msg_name = &udp6_localname; mh6->msg_namelen = sizeof(udp6_localname); - udp4_iov_splice[i].iov_base = udp4_l2_buf[i].data; - udp6_iov_splice[i].iov_base = udp6_l2_buf[i].data; + udp_iov_splice[i].iov_base = udp_payload_buf[i].data; - mh4->msg_iov = &udp4_iov_splice[i]; - mh6->msg_iov = &udp6_iov_splice[i]; + mh4->msg_iov = &udp_iov_splice[i]; + mh6->msg_iov = &udp_iov_splice[i]; mh4->msg_iovlen = mh6->msg_iovlen = 1; } } -- 2.44.0
On Tue, 30 Apr 2024 20:05:39 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:Currently the IPv4 and IPv6 paths unnecessarily use different buffers for the UDP payload. Now that we're handling the various pieces of the UDP packets with an iov, we can split the payload part of the buffers off into its own array shared between IPv4 and IPv6. As well as saving a little memory, this allows the payload buffers to be neatly page aligned. With the buffers merged, udp[46]_l2_iov_sock contain exactly the same thing as each other and can also be merged. Likewise udp[46]_iov_splice can be merged together. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 127 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/udp.c b/udp.c index 86d0419f..8c726056 100644 --- a/udp.c +++ b/udp.c @@ -171,14 +171,22 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8) /* Static buffers */ +static struct udp_payload {As we're defining a new struct, the usual comment format would be nice, and I would also keep it symmetric with tcp_payload_t (udp_payload_t), say: /** * struct udp_payload_t - UDP header and data for inbound messages * @uh: UDP header * @data: UDP data */+ struct udphdr uh; + char data[USHRT_MAX - sizeof(struct udphdr)]; +#ifdef __AVX2__ +} __attribute__ ((packed, aligned(32))) +#else +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) +#endif +udp_payload_buf[UDP_MAX_FRAMES];...and this could be 'udp_payload'.+ /** * udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections * @s_in: Source socket address, filled in by recvmmsg() * @taph: Tap backend specific header * @eh: Prefilled ethernet header * @iph: Pre-filled IP header (except for tot_len and saddr) - * @uh: Headroom for UDP header - * @data: Storage for UDP payload */ static struct udp4_l2_buf_t { struct sockaddr_in s_in; @@ -186,9 +194,6 @@ static struct udp4_l2_buf_t { struct tap_hdr taph; struct ethhdr eh; struct iphdr iph; - struct udphdr uh; - uint8_t data[USHRT_MAX - - (sizeof(struct iphdr) + sizeof(struct udphdr))]; } __attribute__ ((packed, aligned(__alignof__(unsigned int)))) udp4_l2_buf[UDP_MAX_FRAMES]; @@ -198,8 +203,6 @@ udp4_l2_buf[UDP_MAX_FRAMES]; * @taph: Tap backend specific header * @eh: Pre-filled ethernet header * @ip6h: Pre-filled IP header (except for payload_len and addresses) - * @uh: Headroom for UDP header - * @data: Storage for UDP payload */ struct udp6_l2_buf_t { struct sockaddr_in6 s_in6; @@ -212,9 +215,6 @@ struct udp6_l2_buf_t { struct tap_hdr taph; struct ethhdr eh; struct ipv6hdr ip6h; - struct udphdr uh; - uint8_t data[USHRT_MAX - - (sizeof(struct ipv6hdr) + sizeof(struct udphdr))]; #ifdef __AVX2__ } __attribute__ ((packed, aligned(32))) #else @@ -239,8 +239,7 @@ enum udp_iov_idx { }; /* recvmmsg()/sendmmsg() data for tap */ -static struct iovec udp4_l2_iov_sock [UDP_MAX_FRAMES]; -static struct iovec udp6_l2_iov_sock [UDP_MAX_FRAMES]; +static struct iovec udp_l2_iov_sock [UDP_MAX_FRAMES]; static struct iovec udp4_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS]; static struct iovec udp6_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS]; @@ -249,8 +248,7 @@ static struct mmsghdr udp4_l2_mh_sock [UDP_MAX_FRAMES]; static struct mmsghdr udp6_l2_mh_sock [UDP_MAX_FRAMES]; /* recvmmsg()/sendmmsg() data for "spliced" connections */ -static struct iovec udp4_iov_splice [UDP_MAX_FRAMES]; -static struct iovec udp6_iov_splice [UDP_MAX_FRAMES]; +static struct iovec udp_iov_splice [UDP_MAX_FRAMES]; static struct sockaddr_in udp4_localname = { .sin_family = AF_INET, @@ -261,8 +259,8 @@ static struct sockaddr_in6 udp6_localname = { .sin6_addr = IN6ADDR_LOOPBACK_INIT, }; -static struct mmsghdr udp4_mh_splice [UDP_MAX_FRAMES]; -static struct mmsghdr udp6_mh_splice [UDP_MAX_FRAMES]; +static struct mmsghdr udp4_mh_splice [UDP_MAX_FRAMES]; +static struct mmsghdr udp6_mh_splice [UDP_MAX_FRAMES]; /** * udp_portmap_clear() - Clear UDP port map before configuration @@ -322,10 +320,14 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) */ static void udp_iov_init_one(const struct ctx *c, size_t i) { + struct udp_payload *payload = &udp_payload_buf[i]; + struct iovec *siov = &udp_l2_iov_sock[i]; + + *siov = IOV_OF_LVALUE(payload->data); + if (c->ifi4) { struct msghdr *mh = &udp4_l2_mh_sock[i].msg_hdr; struct udp4_l2_buf_t *buf = &udp4_l2_buf[i]; - struct iovec *siov = &udp4_l2_iov_sock[i]; struct iovec *tiov = udp4_l2_iov_tap[i]; *buf = (struct udp4_l2_buf_t) { @@ -333,8 +335,6 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) .iph = L2_BUF_IP4_INIT(IPPROTO_UDP) }; - *siov = IOV_OF_LVALUE(buf->data); - mh->msg_name = &buf->s_in; mh->msg_namelen = sizeof(buf->s_in); mh->msg_iov = siov; @@ -343,13 +343,12 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->iph); - tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; + tiov[UDP_IOV_PAYLOAD].iov_base = payload; } if (c->ifi6) { struct msghdr *mh = &udp6_l2_mh_sock[i].msg_hdr; struct udp6_l2_buf_t *buf = &udp6_l2_buf[i]; - struct iovec *siov = &udp6_l2_iov_sock[i]; struct iovec *tiov = udp6_l2_iov_tap[i]; *buf = (struct udp6_l2_buf_t) { @@ -357,8 +356,6 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) .ip6h = L2_BUF_IP6_INIT(IPPROTO_UDP) }; - *siov = IOV_OF_LVALUE(buf->data); - mh->msg_name = &buf->s_in6; mh->msg_namelen = sizeof(buf->s_in6); mh->msg_iov = siov; @@ -367,7 +364,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->ip6h); - tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; + tiov[UDP_IOV_PAYLOAD].iov_base = payload; } } @@ -581,22 +578,24 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n, /** * udp_update_hdr4() - Update headers for one IPv4 datagram * @c: Execution context - * @b: Pointer to udp4_l2_buf to update + * @bh: Pointer to udp4_l2_buf to update + * @bp: Pointer to udp_payload to update * @dstport: Destination port number * @plen: Length of UDP payload * @now: Current timestamp * * Return: size of IPv4 payload (UDP header + data) */ -static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, +static size_t udp_update_hdr4(const struct ctx *c, + struct udp4_l2_buf_t *bh, struct udp_payload *bp, in_port_t dstport, size_t plen, const struct timespec *now) { + in_port_t srcport = ntohs(bh->s_in.sin_port); const struct in_addr dst = c->ip4.addr_seen; - size_t l4len = plen + sizeof(b->uh); - size_t l3len = l4len + sizeof(b->iph); - in_port_t srcport = ntohs(b->s_in.sin_port); - struct in_addr src = b->s_in.sin_addr; + struct in_addr src = bh->s_in.sin_addr; + size_t l4len = plen + sizeof(bp->uh); + size_t l3len = l4len + sizeof(bh->iph); if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) && IN4_ARE_ADDR_EQUAL(&src, &c->ip4.dns_host) && srcport == 53 && @@ -617,39 +616,41 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, src = c->ip4.gw; } - b->iph.tot_len = htons(l3len); - b->iph.daddr = dst.s_addr; - b->iph.saddr = src.s_addr; - b->iph.check = csum_ip4_header(b->iph.tot_len, IPPROTO_UDP, - src, c->ip4.addr_seen); + bh->iph.tot_len = htons(l3len); + bh->iph.daddr = c->ip4.addr_seen.s_addr;This is still the same as dst.s_addr (existing assignment), right? -- Stefano
On Tue, Apr 30, 2024 at 10:16:04PM +0200, Stefano Brivio wrote:On Tue, 30 Apr 2024 20:05:39 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:Amended as suggested.Currently the IPv4 and IPv6 paths unnecessarily use different buffers for the UDP payload. Now that we're handling the various pieces of the UDP packets with an iov, we can split the payload part of the buffers off into its own array shared between IPv4 and IPv6. As well as saving a little memory, this allows the payload buffers to be neatly page aligned. With the buffers merged, udp[46]_l2_iov_sock contain exactly the same thing as each other and can also be merged. Likewise udp[46]_iov_splice can be merged together. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 127 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 65 insertions(+), 62 deletions(-) diff --git a/udp.c b/udp.c index 86d0419f..8c726056 100644 --- a/udp.c +++ b/udp.c @@ -171,14 +171,22 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8) /* Static buffers */ +static struct udp_payload {As we're defining a new struct, the usual comment format would be nice, and I would also keep it symmetric with tcp_payload_t (udp_payload_t), say: /** * struct udp_payload_t - UDP header and data for inbound messages * @uh: UDP header * @data: UDP data */+ struct udphdr uh; + char data[USHRT_MAX - sizeof(struct udphdr)]; +#ifdef __AVX2__ +} __attribute__ ((packed, aligned(32))) +#else +} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) +#endif +udp_payload_buf[UDP_MAX_FRAMES];...and this could be 'udp_payload'.Ah, yes, I think that's a leftover from some interim version. Spurious change removed. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson+ /** * udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections * @s_in: Source socket address, filled in by recvmmsg() * @taph: Tap backend specific header * @eh: Prefilled ethernet header * @iph: Pre-filled IP header (except for tot_len and saddr) - * @uh: Headroom for UDP header - * @data: Storage for UDP payload */ static struct udp4_l2_buf_t { struct sockaddr_in s_in; @@ -186,9 +194,6 @@ static struct udp4_l2_buf_t { struct tap_hdr taph; struct ethhdr eh; struct iphdr iph; - struct udphdr uh; - uint8_t data[USHRT_MAX - - (sizeof(struct iphdr) + sizeof(struct udphdr))]; } __attribute__ ((packed, aligned(__alignof__(unsigned int)))) udp4_l2_buf[UDP_MAX_FRAMES]; @@ -198,8 +203,6 @@ udp4_l2_buf[UDP_MAX_FRAMES]; * @taph: Tap backend specific header * @eh: Pre-filled ethernet header * @ip6h: Pre-filled IP header (except for payload_len and addresses) - * @uh: Headroom for UDP header - * @data: Storage for UDP payload */ struct udp6_l2_buf_t { struct sockaddr_in6 s_in6; @@ -212,9 +215,6 @@ struct udp6_l2_buf_t { struct tap_hdr taph; struct ethhdr eh; struct ipv6hdr ip6h; - struct udphdr uh; - uint8_t data[USHRT_MAX - - (sizeof(struct ipv6hdr) + sizeof(struct udphdr))]; #ifdef __AVX2__ } __attribute__ ((packed, aligned(32))) #else @@ -239,8 +239,7 @@ enum udp_iov_idx { }; /* recvmmsg()/sendmmsg() data for tap */ -static struct iovec udp4_l2_iov_sock [UDP_MAX_FRAMES]; -static struct iovec udp6_l2_iov_sock [UDP_MAX_FRAMES]; +static struct iovec udp_l2_iov_sock [UDP_MAX_FRAMES]; static struct iovec udp4_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS]; static struct iovec udp6_l2_iov_tap [UDP_MAX_FRAMES][UDP_NUM_IOVS]; @@ -249,8 +248,7 @@ static struct mmsghdr udp4_l2_mh_sock [UDP_MAX_FRAMES]; static struct mmsghdr udp6_l2_mh_sock [UDP_MAX_FRAMES]; /* recvmmsg()/sendmmsg() data for "spliced" connections */ -static struct iovec udp4_iov_splice [UDP_MAX_FRAMES]; -static struct iovec udp6_iov_splice [UDP_MAX_FRAMES]; +static struct iovec udp_iov_splice [UDP_MAX_FRAMES]; static struct sockaddr_in udp4_localname = { .sin_family = AF_INET, @@ -261,8 +259,8 @@ static struct sockaddr_in6 udp6_localname = { .sin6_addr = IN6ADDR_LOOPBACK_INIT, }; -static struct mmsghdr udp4_mh_splice [UDP_MAX_FRAMES]; -static struct mmsghdr udp6_mh_splice [UDP_MAX_FRAMES]; +static struct mmsghdr udp4_mh_splice [UDP_MAX_FRAMES]; +static struct mmsghdr udp6_mh_splice [UDP_MAX_FRAMES]; /** * udp_portmap_clear() - Clear UDP port map before configuration @@ -322,10 +320,14 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) */ static void udp_iov_init_one(const struct ctx *c, size_t i) { + struct udp_payload *payload = &udp_payload_buf[i]; + struct iovec *siov = &udp_l2_iov_sock[i]; + + *siov = IOV_OF_LVALUE(payload->data); + if (c->ifi4) { struct msghdr *mh = &udp4_l2_mh_sock[i].msg_hdr; struct udp4_l2_buf_t *buf = &udp4_l2_buf[i]; - struct iovec *siov = &udp4_l2_iov_sock[i]; struct iovec *tiov = udp4_l2_iov_tap[i]; *buf = (struct udp4_l2_buf_t) { @@ -333,8 +335,6 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) .iph = L2_BUF_IP4_INIT(IPPROTO_UDP) }; - *siov = IOV_OF_LVALUE(buf->data); - mh->msg_name = &buf->s_in; mh->msg_namelen = sizeof(buf->s_in); mh->msg_iov = siov; @@ -343,13 +343,12 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->iph); - tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; + tiov[UDP_IOV_PAYLOAD].iov_base = payload; } if (c->ifi6) { struct msghdr *mh = &udp6_l2_mh_sock[i].msg_hdr; struct udp6_l2_buf_t *buf = &udp6_l2_buf[i]; - struct iovec *siov = &udp6_l2_iov_sock[i]; struct iovec *tiov = udp6_l2_iov_tap[i]; *buf = (struct udp6_l2_buf_t) { @@ -357,8 +356,6 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) .ip6h = L2_BUF_IP6_INIT(IPPROTO_UDP) }; - *siov = IOV_OF_LVALUE(buf->data); - mh->msg_name = &buf->s_in6; mh->msg_namelen = sizeof(buf->s_in6); mh->msg_iov = siov; @@ -367,7 +364,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->ip6h); - tiov[UDP_IOV_PAYLOAD].iov_base = &buf->uh; + tiov[UDP_IOV_PAYLOAD].iov_base = payload; } } @@ -581,22 +578,24 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n, /** * udp_update_hdr4() - Update headers for one IPv4 datagram * @c: Execution context - * @b: Pointer to udp4_l2_buf to update + * @bh: Pointer to udp4_l2_buf to update + * @bp: Pointer to udp_payload to update * @dstport: Destination port number * @plen: Length of UDP payload * @now: Current timestamp * * Return: size of IPv4 payload (UDP header + data) */ -static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, +static size_t udp_update_hdr4(const struct ctx *c, + struct udp4_l2_buf_t *bh, struct udp_payload *bp, in_port_t dstport, size_t plen, const struct timespec *now) { + in_port_t srcport = ntohs(bh->s_in.sin_port); const struct in_addr dst = c->ip4.addr_seen; - size_t l4len = plen + sizeof(b->uh); - size_t l3len = l4len + sizeof(b->iph); - in_port_t srcport = ntohs(b->s_in.sin_port); - struct in_addr src = b->s_in.sin_addr; + struct in_addr src = bh->s_in.sin_addr; + size_t l4len = plen + sizeof(bp->uh); + size_t l3len = l4len + sizeof(bh->iph); if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) && IN4_ARE_ADDR_EQUAL(&src, &c->ip4.dns_host) && srcport == 53 && @@ -617,39 +616,41 @@ static size_t udp_update_hdr4(const struct ctx *c, struct udp4_l2_buf_t *b, src = c->ip4.gw; } - b->iph.tot_len = htons(l3len); - b->iph.daddr = dst.s_addr; - b->iph.saddr = src.s_addr; - b->iph.check = csum_ip4_header(b->iph.tot_len, IPPROTO_UDP, - src, c->ip4.addr_seen); + bh->iph.tot_len = htons(l3len); + bh->iph.daddr = c->ip4.addr_seen.s_addr;This is still the same as dst.s_addr (existing assignment), right?
Currently each tap-bound frame buffer has room for its own ethernet header. However the ethernet header is always the same for such frames, so now that we're indirectly referencing the ethernet header via iov, we can use a single buffer for all of them. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/udp.c b/udp.c index 8c726056..64e7dcef 100644 --- a/udp.c +++ b/udp.c @@ -181,39 +181,40 @@ static struct udp_payload { #endif udp_payload_buf[UDP_MAX_FRAMES]; +/* Ethernet header for IPv4 frames */ +static struct ethhdr udp4_eth_hdr; + /** * udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections * @s_in: Source socket address, filled in by recvmmsg() * @taph: Tap backend specific header - * @eh: Prefilled ethernet header * @iph: Pre-filled IP header (except for tot_len and saddr) */ static struct udp4_l2_buf_t { struct sockaddr_in s_in; struct tap_hdr taph; - struct ethhdr eh; struct iphdr iph; } __attribute__ ((packed, aligned(__alignof__(unsigned int)))) udp4_l2_buf[UDP_MAX_FRAMES]; +/* Ethernet header for IPv6 frames */ +static struct ethhdr udp6_eth_hdr; + /** * udp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections * @s_in6: Source socket address, filled in by recvmmsg() * @taph: Tap backend specific header - * @eh: Pre-filled ethernet header * @ip6h: Pre-filled IP header (except for payload_len and addresses) */ struct udp6_l2_buf_t { struct sockaddr_in6 s_in6; #ifdef __AVX2__ /* Align ip6h to 32-byte boundary. */ - uint8_t pad[64 - (sizeof(struct sockaddr_in6) + sizeof(struct ethhdr) + - sizeof(struct tap_hdr))]; + uint8_t pad[64 - (sizeof(struct sockaddr_in6) + sizeof(struct tap_hdr))]; #endif struct tap_hdr taph; - struct ethhdr eh; struct ipv6hdr ip6h; #ifdef __AVX2__ } __attribute__ ((packed, aligned(32))) @@ -302,15 +303,8 @@ static void udp_invert_portmap(struct udp_fwd_ports *fwd) */ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) { - int i; - - for (i = 0; i < UDP_MAX_FRAMES; i++) { - struct udp4_l2_buf_t *b4 = &udp4_l2_buf[i]; - struct udp6_l2_buf_t *b6 = &udp6_l2_buf[i]; - - eth_update_mac(&b4->eh, eth_d, eth_s); - eth_update_mac(&b6->eh, eth_d, eth_s); - } + eth_update_mac(&udp4_eth_hdr, eth_d, eth_s); + eth_update_mac(&udp6_eth_hdr, eth_d, eth_s); } /** @@ -324,6 +318,8 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) struct iovec *siov = &udp_l2_iov_sock[i]; *siov = IOV_OF_LVALUE(payload->data); + udp4_eth_hdr.h_proto = htons_constant(ETH_P_IP); + udp6_eth_hdr.h_proto = htons_constant(ETH_P_IPV6); if (c->ifi4) { struct msghdr *mh = &udp4_l2_mh_sock[i].msg_hdr; @@ -331,7 +327,6 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) struct iovec *tiov = udp4_l2_iov_tap[i]; *buf = (struct udp4_l2_buf_t) { - .eh = ETH_HDR_INIT(ETH_P_IP), .iph = L2_BUF_IP4_INIT(IPPROTO_UDP) }; @@ -341,7 +336,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) mh->msg_iovlen = 1; tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); - tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); + tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr); tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->iph); tiov[UDP_IOV_PAYLOAD].iov_base = payload; } @@ -352,7 +347,6 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) struct iovec *tiov = udp6_l2_iov_tap[i]; *buf = (struct udp6_l2_buf_t) { - .eh = ETH_HDR_INIT(ETH_P_IPV6), .ip6h = L2_BUF_IP6_INIT(IPPROTO_UDP) }; @@ -362,7 +356,7 @@ static void udp_iov_init_one(const struct ctx *c, size_t i) mh->msg_iovlen = 1; tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); - tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(buf->eh); + tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr); tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->ip6h); tiov[UDP_IOV_PAYLOAD].iov_base = payload; } @@ -627,7 +621,7 @@ static size_t udp_update_hdr4(const struct ctx *c, bp->uh.len = htons(l4len); csum_udp4(&bp->uh, src, dst, bp->data, plen); - tap_hdr_update(&bh->taph, l3len + sizeof(bh->eh)); + tap_hdr_update(&bh->taph, l3len + sizeof(udp4_eth_hdr)); return l4len; } @@ -698,7 +692,7 @@ static size_t udp_update_hdr6(const struct ctx *c, bp->uh.len = bh->ip6h.payload_len; csum_udp6(&bp->uh, src, dst, bp->data, plen); - tap_hdr_update(&bh->taph, l4len + sizeof(bh->ip6h) + sizeof(bh->eh)); + tap_hdr_update(&bh->taph, l4len + sizeof(bh->ip6h) + sizeof(udp6_eth_hdr)); return l4len; } -- 2.44.0
Currently we have separate arrays for IPv4 and IPv6 which contain the headers for guest-bound packets, and also the originating socket address. We can combine these into a single array of "metadata" structures with space for both pre-cooked IPv4 and IPv6 headers, as well as shared space for the tap specific header and socket address (using sockaddr_inany). Because we're using IOVs to separately address the pieces of each frame, these structures don't need to be packed to keep the headers contiguous so we can more naturally arrange for the alignment we want. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 131 ++++++++++++++++++++++++---------------------------------- 1 file changed, 55 insertions(+), 76 deletions(-) diff --git a/udp.c b/udp.c index 64e7dcef..a9cc6ed2 100644 --- a/udp.c +++ b/udp.c @@ -184,44 +184,27 @@ udp_payload_buf[UDP_MAX_FRAMES]; /* Ethernet header for IPv4 frames */ static struct ethhdr udp4_eth_hdr; -/** - * udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections - * @s_in: Source socket address, filled in by recvmmsg() - * @taph: Tap backend specific header - * @iph: Pre-filled IP header (except for tot_len and saddr) - */ -static struct udp4_l2_buf_t { - struct sockaddr_in s_in; - - struct tap_hdr taph; - struct iphdr iph; -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) -udp4_l2_buf[UDP_MAX_FRAMES]; - /* Ethernet header for IPv6 frames */ static struct ethhdr udp6_eth_hdr; /** - * udp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections - * @s_in6: Source socket address, filled in by recvmmsg() + * udp_meta - Pre-cooked headers and metadata for UDP packets + * @ip6h: Pre-filled IPv6 header (except for payload_len and addresses) + * @ip4h: Pre-filled IPv4 header (except for tot_len and saddr) * @taph: Tap backend specific header - * @ip6h: Pre-filled IP header (except for payload_len and addresses) + * @s_in: Source socket address, filled in by recvmmsg() */ -struct udp6_l2_buf_t { - struct sockaddr_in6 s_in6; -#ifdef __AVX2__ - /* Align ip6h to 32-byte boundary. */ - uint8_t pad[64 - (sizeof(struct sockaddr_in6) + sizeof(struct tap_hdr))]; -#endif - - struct tap_hdr taph; +static struct udp_meta { struct ipv6hdr ip6h; + struct iphdr ip4h; + struct tap_hdr taph; + + union sockaddr_inany s_in; +} #ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))) -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) +__attribute__ ((aligned(32))) #endif -udp6_l2_buf[UDP_MAX_FRAMES]; +udp_meta_buf[UDP_MAX_FRAMES]; /** * enum udp_iov_idx - Indices for the buffers making up a single UDP frame @@ -315,49 +298,46 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) static void udp_iov_init_one(const struct ctx *c, size_t i) { struct udp_payload *payload = &udp_payload_buf[i]; + struct udp_meta *meta = &udp_meta_buf[i]; struct iovec *siov = &udp_l2_iov_sock[i]; + *meta = (struct udp_meta) { + .ip4h = L2_BUF_IP4_INIT(IPPROTO_UDP), + .ip6h = L2_BUF_IP6_INIT(IPPROTO_UDP), + }; + + *siov = IOV_OF_LVALUE(payload->data); udp4_eth_hdr.h_proto = htons_constant(ETH_P_IP); udp6_eth_hdr.h_proto = htons_constant(ETH_P_IPV6); if (c->ifi4) { struct msghdr *mh = &udp4_l2_mh_sock[i].msg_hdr; - struct udp4_l2_buf_t *buf = &udp4_l2_buf[i]; struct iovec *tiov = udp4_l2_iov_tap[i]; - *buf = (struct udp4_l2_buf_t) { - .iph = L2_BUF_IP4_INIT(IPPROTO_UDP) - }; - - mh->msg_name = &buf->s_in; - mh->msg_namelen = sizeof(buf->s_in); + mh->msg_name = &meta->s_in; + mh->msg_namelen = sizeof(struct sockaddr_in); mh->msg_iov = siov; mh->msg_iovlen = 1; - tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); + tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph); tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr); - tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->iph); + tiov[UDP_IOV_IP] = IOV_OF_LVALUE(meta->ip4h); tiov[UDP_IOV_PAYLOAD].iov_base = payload; } if (c->ifi6) { struct msghdr *mh = &udp6_l2_mh_sock[i].msg_hdr; - struct udp6_l2_buf_t *buf = &udp6_l2_buf[i]; struct iovec *tiov = udp6_l2_iov_tap[i]; - *buf = (struct udp6_l2_buf_t) { - .ip6h = L2_BUF_IP6_INIT(IPPROTO_UDP) - }; - - mh->msg_name = &buf->s_in6; - mh->msg_namelen = sizeof(buf->s_in6); + mh->msg_name = &meta->s_in; + mh->msg_namelen = sizeof(struct sockaddr_in6); mh->msg_iov = siov; mh->msg_iovlen = 1; - tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &buf->taph); + tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph); tiov[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr); - tiov[UDP_IOV_IP] = IOV_OF_LVALUE(buf->ip6h); + tiov[UDP_IOV_IP] = IOV_OF_LVALUE(meta->ip6h); tiov[UDP_IOV_PAYLOAD].iov_base = payload; } } @@ -572,7 +552,7 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n, /** * udp_update_hdr4() - Update headers for one IPv4 datagram * @c: Execution context - * @bh: Pointer to udp4_l2_buf to update + * @bm: Pointer to udp_meta to update * @bp: Pointer to udp_payload to update * @dstport: Destination port number * @plen: Length of UDP payload @@ -581,15 +561,15 @@ static void udp_splice_sendfrom(const struct ctx *c, unsigned start, unsigned n, * Return: size of IPv4 payload (UDP header + data) */ static size_t udp_update_hdr4(const struct ctx *c, - struct udp4_l2_buf_t *bh, struct udp_payload *bp, + struct udp_meta *bm, struct udp_payload *bp, in_port_t dstport, size_t plen, const struct timespec *now) { - in_port_t srcport = ntohs(bh->s_in.sin_port); + in_port_t srcport = ntohs(bm->s_in.sa4.sin_port); const struct in_addr dst = c->ip4.addr_seen; - struct in_addr src = bh->s_in.sin_addr; + struct in_addr src = bm->s_in.sa4.sin_addr; size_t l4len = plen + sizeof(bp->uh); - size_t l3len = l4len + sizeof(bh->iph); + size_t l3len = l4len + sizeof(bm->ip4h); if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) && IN4_ARE_ADDR_EQUAL(&src, &c->ip4.dns_host) && srcport == 53 && @@ -610,25 +590,25 @@ static size_t udp_update_hdr4(const struct ctx *c, src = c->ip4.gw; } - bh->iph.tot_len = htons(l3len); - bh->iph.daddr = c->ip4.addr_seen.s_addr; - bh->iph.saddr = src.s_addr; - bh->iph.check = csum_ip4_header(bh->iph.tot_len, IPPROTO_UDP, - src, c->ip4.addr_seen); + bm->ip4h.tot_len = htons(l3len); + bm->ip4h.daddr = c->ip4.addr_seen.s_addr; + bm->ip4h.saddr = src.s_addr; + bm->ip4h.check = csum_ip4_header(bm->ip4h.tot_len, IPPROTO_UDP, + src, c->ip4.addr_seen); - bp->uh.source = bh->s_in.sin_port; + bp->uh.source = bm->s_in.sa4.sin_port; bp->uh.dest = htons(dstport); bp->uh.len = htons(l4len); csum_udp4(&bp->uh, src, dst, bp->data, plen); - tap_hdr_update(&bh->taph, l3len + sizeof(udp4_eth_hdr)); + tap_hdr_update(&bm->taph, l3len + sizeof(udp4_eth_hdr)); return l4len; } /** * udp_update_hdr6() - Update headers for one IPv6 datagram * @c: Execution context - * @bh: Pointer to udp6_l2_buf to update + * @bm: Pointer to udp_meta to update * @bp: Pointer to udp_payload to update * @dstport: Destination port number * @plen: Length of UDP payload @@ -637,13 +617,13 @@ static size_t udp_update_hdr4(const struct ctx *c, * Return: size of IPv6 payload (UDP header + data) */ static size_t udp_update_hdr6(const struct ctx *c, - struct udp6_l2_buf_t *bh, struct udp_payload *bp, + struct udp_meta *bm, struct udp_payload *bp, in_port_t dstport, size_t plen, const struct timespec *now) { - const struct in6_addr *src = &bh->s_in6.sin6_addr; + const struct in6_addr *src = &bm->s_in.sa6.sin6_addr; const struct in6_addr *dst = &c->ip6.addr_seen; - in_port_t srcport = ntohs(bh->s_in6.sin6_port); + in_port_t srcport = ntohs(bm->s_in.sa6.sin6_port); uint16_t l4len = plen + sizeof(bp->uh); if (IN6_IS_ADDR_LINKLOCAL(src)) { @@ -680,19 +660,19 @@ static size_t udp_update_hdr6(const struct ctx *c, } - bh->ip6h.payload_len = htons(l4len); - bh->ip6h.daddr = *dst; - bh->ip6h.saddr = *src; - bh->ip6h.version = 6; - bh->ip6h.nexthdr = IPPROTO_UDP; - bh->ip6h.hop_limit = 255; + bm->ip6h.payload_len = htons(l4len); + bm->ip6h.daddr = *dst; + bm->ip6h.saddr = *src; + bm->ip6h.version = 6; + bm->ip6h.nexthdr = IPPROTO_UDP; + bm->ip6h.hop_limit = 255; - bp->uh.source = bh->s_in6.sin6_port; + bp->uh.source = bm->s_in.sa6.sin6_port; bp->uh.dest = htons(dstport); - bp->uh.len = bh->ip6h.payload_len; + bp->uh.len = bm->ip6h.payload_len; csum_udp6(&bp->uh, src, dst, bp->data, plen); - tap_hdr_update(&bh->taph, l4len + sizeof(bh->ip6h) + sizeof(udp6_eth_hdr)); + tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) + sizeof(udp6_eth_hdr)); return l4len; } @@ -721,15 +701,14 @@ static void udp_tap_send(const struct ctx *c, for (i = start; i < start + n; i++) { struct udp_payload *bp = &udp_payload_buf[i]; + struct udp_meta *bm = &udp_meta_buf[i]; size_t l4len; if (v6) { - l4len = udp_update_hdr6(c, &udp6_l2_buf[i], bp, - dstport, + l4len = udp_update_hdr6(c, bm, bp, dstport, udp6_l2_mh_sock[i].msg_len, now); } else { - l4len = udp_update_hdr4(c, &udp4_l2_buf[i], bp, - dstport, + l4len = udp_update_hdr4(c, bm, bp, dstport, udp4_l2_mh_sock[i].msg_len, now); } tap_iov[i][UDP_IOV_PAYLOAD].iov_len = l4len; -- 2.44.0
On Tue, 30 Apr 2024 20:05:41 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:Currently we have separate arrays for IPv4 and IPv6 which contain the headers for guest-bound packets, and also the originating socket address. We can combine these into a single array of "metadata" structures with space for both pre-cooked IPv4 and IPv6 headers, as well as shared space for the tap specific header and socket address (using sockaddr_inany).This is neat, definitely, I just wonder: will we need to essentially revert this if we implement a flow-based mechanism where frames can be sent to different guests/containers? On the other hand, chances are it would help instead because we would have tables of metadata instead of those being buried into frames.Because we're using IOVs to separately address the pieces of each frame, these structures don't need to be packed to keep the headers contiguous so we can more naturally arrange for the alignment we want. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 131 ++++++++++++++++++++++++---------------------------------- 1 file changed, 55 insertions(+), 76 deletions(-) diff --git a/udp.c b/udp.c index 64e7dcef..a9cc6ed2 100644 --- a/udp.c +++ b/udp.c @@ -184,44 +184,27 @@ udp_payload_buf[UDP_MAX_FRAMES]; /* Ethernet header for IPv4 frames */ static struct ethhdr udp4_eth_hdr; -/** - * udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections - * @s_in: Source socket address, filled in by recvmmsg() - * @taph: Tap backend specific header - * @iph: Pre-filled IP header (except for tot_len and saddr) - */ -static struct udp4_l2_buf_t { - struct sockaddr_in s_in; - - struct tap_hdr taph; - struct iphdr iph; -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) -udp4_l2_buf[UDP_MAX_FRAMES]; - /* Ethernet header for IPv6 frames */ static struct ethhdr udp6_eth_hdr; /** - * udp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections - * @s_in6: Source socket address, filled in by recvmmsg() + * udp_meta - Pre-cooked headers and metadata for UDP packetsPre-existing, but... kernel-doc format wants *struct* x - Description.+ * @ip6h: Pre-filled IPv6 header (except for payload_len and addresses) + * @ip4h: Pre-filled IPv4 header (except for tot_len and saddr) * @taph: Tap backend specific header - * @ip6h: Pre-filled IP header (except for payload_len and addresses) + * @s_in: Source socket address, filled in by recvmmsg() */ -struct udp6_l2_buf_t { - struct sockaddr_in6 s_in6; -#ifdef __AVX2__ - /* Align ip6h to 32-byte boundary. */ - uint8_t pad[64 - (sizeof(struct sockaddr_in6) + sizeof(struct tap_hdr))]; -#endif - - struct tap_hdr taph; +static struct udp_meta { struct ipv6hdr ip6h; + struct iphdr ip4h; + struct tap_hdr taph; + + union sockaddr_inany s_in; +} #ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))) -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) +__attribute__ ((aligned(32))) #endif -udp6_l2_buf[UDP_MAX_FRAMES]; +udp_meta_buf[UDP_MAX_FRAMES]; /** * enum udp_iov_idx - Indices for the buffers making up a single UDP frame @@ -315,49 +298,46 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) static void udp_iov_init_one(const struct ctx *c, size_t i) { struct udp_payload *payload = &udp_payload_buf[i]; + struct udp_meta *meta = &udp_meta_buf[i]; struct iovec *siov = &udp_l2_iov_sock[i]; + *meta = (struct udp_meta) { + .ip4h = L2_BUF_IP4_INIT(IPPROTO_UDP), + .ip6h = L2_BUF_IP6_INIT(IPPROTO_UDP), + }; + +One extra newline should be enough. -- Stefano
On Tue, Apr 30, 2024 at 10:16:34PM +0200, Stefano Brivio wrote:On Tue, 30 Apr 2024 20:05:41 +1000 David Gibson <david(a)gibson.dropbear.id.au> wrote:No, I don't think so. There's still a separate IPv4 and IPv6 header, and socket address for each frame. It's just that those are stored in a parallel array, rather than alongside the paylaod buffers. If we are handling multiple guests, we can probably additionally merge the IPv4 and IPv6 header buffers, since I don't think there will be anything we can pre-fill any more.Currently we have separate arrays for IPv4 and IPv6 which contain the headers for guest-bound packets, and also the originating socket address. We can combine these into a single array of "metadata" structures with space for both pre-cooked IPv4 and IPv6 headers, as well as shared space for the tap specific header and socket address (using sockaddr_inany).This is neat, definitely, I just wonder: will we need to essentially revert this if we implement a flow-based mechanism where frames can be sent to different guests/containers?On the other hand, chances are it would help instead because we would have tables of metadata instead of those being buried into frames.Adjusted. I also changed to udp_meta_t and udp_meta[] for consistency with the payload buffers.Because we're using IOVs to separately address the pieces of each frame, these structures don't need to be packed to keep the headers contiguous so we can more naturally arrange for the alignment we want. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- udp.c | 131 ++++++++++++++++++++++++---------------------------------- 1 file changed, 55 insertions(+), 76 deletions(-) diff --git a/udp.c b/udp.c index 64e7dcef..a9cc6ed2 100644 --- a/udp.c +++ b/udp.c @@ -184,44 +184,27 @@ udp_payload_buf[UDP_MAX_FRAMES]; /* Ethernet header for IPv4 frames */ static struct ethhdr udp4_eth_hdr; -/** - * udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections - * @s_in: Source socket address, filled in by recvmmsg() - * @taph: Tap backend specific header - * @iph: Pre-filled IP header (except for tot_len and saddr) - */ -static struct udp4_l2_buf_t { - struct sockaddr_in s_in; - - struct tap_hdr taph; - struct iphdr iph; -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) -udp4_l2_buf[UDP_MAX_FRAMES]; - /* Ethernet header for IPv6 frames */ static struct ethhdr udp6_eth_hdr; /** - * udp6_l2_buf_t - Pre-cooked IPv6 packet buffers for tap connections - * @s_in6: Source socket address, filled in by recvmmsg() + * udp_meta - Pre-cooked headers and metadata for UDP packetsPre-existing, but... kernel-doc format wants *struct* x - Description.Fixed. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson+ * @ip6h: Pre-filled IPv6 header (except for payload_len and addresses) + * @ip4h: Pre-filled IPv4 header (except for tot_len and saddr) * @taph: Tap backend specific header - * @ip6h: Pre-filled IP header (except for payload_len and addresses) + * @s_in: Source socket address, filled in by recvmmsg() */ -struct udp6_l2_buf_t { - struct sockaddr_in6 s_in6; -#ifdef __AVX2__ - /* Align ip6h to 32-byte boundary. */ - uint8_t pad[64 - (sizeof(struct sockaddr_in6) + sizeof(struct tap_hdr))]; -#endif - - struct tap_hdr taph; +static struct udp_meta { struct ipv6hdr ip6h; + struct iphdr ip4h; + struct tap_hdr taph; + + union sockaddr_inany s_in; +} #ifdef __AVX2__ -} __attribute__ ((packed, aligned(32))) -#else -} __attribute__ ((packed, aligned(__alignof__(unsigned int)))) +__attribute__ ((aligned(32))) #endif -udp6_l2_buf[UDP_MAX_FRAMES]; +udp_meta_buf[UDP_MAX_FRAMES]; /** * enum udp_iov_idx - Indices for the buffers making up a single UDP frame @@ -315,49 +298,46 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s) static void udp_iov_init_one(const struct ctx *c, size_t i) { struct udp_payload *payload = &udp_payload_buf[i]; + struct udp_meta *meta = &udp_meta_buf[i]; struct iovec *siov = &udp_l2_iov_sock[i]; + *meta = (struct udp_meta) { + .ip4h = L2_BUF_IP4_INIT(IPPROTO_UDP), + .ip6h = L2_BUF_IP6_INIT(IPPROTO_UDP), + }; + +One extra newline should be enough.