On Mon, Mar 09, 2026 at 10:47:35AM +0100, Laurent Vivier wrote:
Replace direct iovec pointer arithmetic in UDP vhost-user handling with iov_tail operations.
udp_vu_sock_recv() now takes an iov/cnt pair instead of using the file-scoped iov_vu array, and returns the data length rather than the iov count. Internally it uses iov_drop_header() to skip past L2/L3/L4 headers before receiving, and iov_tail_clone() to build the recvmsg() iovec, removing the manual pointer offset and restore pattern.
udp_vu_prepare() and udp_vu_csum() take a const struct iov_tail * instead of referencing iov_vu directly, making data flow explicit.
udp_vu_csum() uses iov_drop_header() and IOV_REMOVE_HEADER() to locate the UDP header and payload, replacing manual offset calculations via vu_payloadv4()/vu_payloadv6().
Signed-off-by: Laurent Vivier
Minor notes only.
--- udp_vu.c | 111 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 54 deletions(-)
diff --git a/udp_vu.c b/udp_vu.c index 439f2cb399b7..a39254776099 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -59,21 +59,25 @@ static size_t udp_vu_hdrlen(bool v6) /** * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers * @c: Execution context + * @iov: IO vector for the frame (modified on output) + * @cnt: Number of IO vector entries (in/out)
Nit: "modified on output" and "in/out" are different ways of saying the same thing, yes?
* @vq: virtqueue to use to receive data * @s: Socket to receive from * @v6: Set for IPv6 connections - * @dlen: Size of received data (output) * - * Return: number of iov entries used to store the datagram, 0 if the datagram + * Return: size of received data, 0 if the datagram * was discarded because the virtqueue is not ready, -1 on error */ -static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, - bool v6, ssize_t *dlen) +static ssize_t udp_vu_sock_recv(const struct ctx *c, struct iovec *iov, + size_t *cnt, struct vu_virtq *vq, int s, + bool v6) { const struct vu_dev *vdev = c->vdev; struct msghdr msg = { 0 }; - int iov_cnt, iov_used; + struct iov_tail payload; size_t hdrlen; + ssize_t dlen; + int iov_cnt;
ASSERT(!c->no_udp);
@@ -83,78 +87,74 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, if (recvmsg(s, &msg, MSG_DONTWAIT) < 0) debug_perror("Failed to discard datagram");
+ *cnt = 0; return 0; }
/* compute L2 header length */ hdrlen = udp_vu_hdrlen(v6);
- vu_init_elem(elem, iov_vu, ARRAY_SIZE(elem)); + vu_init_elem(elem, iov, *cnt);
iov_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem), IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL); if (iov_cnt == 0) return -1;
- /* reserve space for the headers */ - ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN)); + payload = IOV_TAIL(iov, iov_cnt, hdrlen);
- iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen; - iov_vu[0].iov_len -= hdrlen; + struct iovec msg_iov[payload.cnt];
We generally avoid inline declarations, although C11 does allow them.
+ msg.msg_iov = msg_iov; + msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload); /* read data from the socket */ - msg.msg_iov = iov_vu; - msg.msg_iovlen = iov_cnt; - - *dlen = recvmsg(s, &msg, 0); - if (*dlen < 0) { + dlen = recvmsg(s, &msg, 0); + if (dlen < 0) { vu_queue_rewind(vq, iov_cnt); return -1; }
- /* restore the pointer to the headers address */ - iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen; - iov_vu[0].iov_len += hdrlen; - /* Pad short frames to ETH_ZLEN */ - if (ETH_ZLEN + VNET_HLEN > *dlen + hdrlen) { - iov_memset(iov_vu, iov_cnt, *dlen + hdrlen, 0, - ETH_ZLEN + VNET_HLEN - (*dlen + hdrlen)); + if (ETH_ZLEN + VNET_HLEN > dlen + hdrlen) { + iov_memset(iov, iov_cnt, dlen + hdrlen, 0, + ETH_ZLEN + VNET_HLEN - (dlen + hdrlen)); } - iov_used = iov_truncate(iov_vu, iov_cnt, *dlen + hdrlen); + *cnt = iov_truncate(iov, iov_cnt, dlen + hdrlen); - vu_set_vnethdr(iov_vu[0].iov_base, iov_used); + vu_set_vnethdr(iov[0].iov_base, *cnt);
/* release unused buffers */ - vu_queue_rewind(vq, iov_cnt - iov_used); + vu_queue_rewind(vq, iov_cnt - *cnt);
- return iov_used; + return dlen; }
/** * udp_vu_prepare() - Prepare the packet header * @c: Execution context + * @data: IO vector tail for the frame * @toside: Address information for one side of the flow * @dlen: Packet data length * * Return: Layer-4 length */ -static size_t udp_vu_prepare(const struct ctx *c, +static size_t udp_vu_prepare(const struct ctx *c, const struct iov_tail *data, const struct flowside *toside, ssize_t dlen) { + const struct iovec *iov = data->iov; struct ethhdr *eh; size_t l4len;
/* ethernet header */ - eh = vu_eth(iov_vu[0].iov_base); + eh = vu_eth(iov[0].iov_base);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest)); memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
/* initialize header */ if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) { - struct iphdr *iph = vu_ip(iov_vu[0].iov_base); - struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base); + struct iphdr *iph = vu_ip(iov[0].iov_base); + struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
eh->h_proto = htons(ETH_P_IP);
@@ -162,8 +162,8 @@ static size_t udp_vu_prepare(const struct ctx *c,
l4len = udp_update_hdr4(iph, bp, toside, dlen, true); } else { - struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base); - struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base); + struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base); + struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
eh->h_proto = htons(ETH_P_IPV6);
@@ -178,25 +178,25 @@ static size_t udp_vu_prepare(const struct ctx *c, /** * udp_vu_csum() - Calculate and set checksum for a UDP packet * @toside: Address information for one side of the flow - * @iov_used: Number of used iov_vu items + * @data: IO vector tail for the frame
With or without the VU header?
*/ -static void udp_vu_csum(const struct flowside *toside, int iov_used) +static void udp_vu_csum(const struct flowside *toside, + const struct iov_tail *data) { const struct in_addr *src4 = inany_v4(&toside->oaddr); const struct in_addr *dst4 = inany_v4(&toside->eaddr); - char *base = iov_vu[0].iov_base; - struct udp_payload_t *bp; - struct iov_tail data; + struct iov_tail payload = *data; + struct udphdr *uh, uh_storage; + bool ipv4 = src4 && dst4;
- if (src4 && dst4) { - bp = vu_payloadv4(base); - data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base); - csum_udp4(&bp->uh, *src4, *dst4, &data); - } else { - bp = vu_payloadv6(base); - data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base); - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data); - } + iov_drop_header(&payload, + udp_vu_hdrlen(!ipv4) - sizeof(struct udphdr));
This construction is a bit awkward, better to IOV_DROP_HEADER() on the ethernet, then the IP header?
+ uh = IOV_REMOVE_HEADER(&payload, uh_storage); + + if (ipv4) + csum_udp4(uh, *src4, *dst4, &payload); + else + csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, &payload); }
/** @@ -212,23 +212,26 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)); struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; + struct iov_tail data; int i;
for (i = 0; i < n; i++) { + size_t iov_cnt; ssize_t dlen; - int iov_used;
- iov_used = udp_vu_sock_recv(c, vq, s, v6, &dlen); - if (iov_used < 0) + iov_cnt = VIRTQUEUE_MAX_SIZE; + dlen = udp_vu_sock_recv(c, iov_vu, &iov_cnt, vq, s, v6); + if (dlen < 0) break;
- if (iov_used > 0) { - udp_vu_prepare(c, toside, dlen); + if (iov_cnt > 0) { + data = IOV_TAIL(iov_vu, iov_cnt, 0); + udp_vu_prepare(c, &data, toside, dlen); if (*c->pcap) { - udp_vu_csum(toside, iov_used); - pcap_iov(iov_vu, iov_used, VNET_HLEN); + udp_vu_csum(toside, &data); + pcap_iov(data.iov, data.cnt, VNET_HLEN); } - vu_flush(vdev, vq, elem, iov_used); + vu_flush(vdev, vq, elem, data.cnt); } } } -- 2.53.0
-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson