Re: [PATCH v3 5/5] tcp, udp: Pad batched frames for vhost-user modes to 60 bytes (802.3 minimum)

8 Dec 2025

On Sat, Dec 06, 2025 at 02:26:57AM +0100, Stefano Brivio wrote:
...
For both TCP and UDP, instead of just expecting the first provided
buffer to be large enough to contain the headers we need (from 42
bytes for UDP data over IPv4 to 82 bytes for TCP with options over
IPv6), change that assumption to make sure that buffers are anyway
at least ETH_ZLEN-sized buffers (60 bytes).
This looks reasonable because there are no known vhost-user
hypervisor implementations that would give us smaller buffers than
that, and we would anyway hit an assertion failure with IPv6 if we
ever had less than 60 bytes per buffer.
At this point, all we need to do is to pad the first (and only)
buffer, should data and headers use less than that.
Link: https://bugs.passt.top/show_bug.cgi?id=166
Signed-off-by: Stefano Brivio 
I find the way iov_len changes through the whole path a bit confusing:

   1. First, it represents buffer length we get from vu_collect
   2. We check that it has room for a padded frame
   3. We decrease it based on the _unpadded_ frame
   4. We write the padding to the buffer
   5. We increase it again to include the padding

The last step is safe because of the earlier check.  I think it's now
adequately clear that it's safe, but it's not _super_ clear because
there are still some in between steps.  I'd kind of prefer to not
shrink iov_len below the value for the padded frame size in the first
place.

But, this does the job, and I don't really want to hold this up any
longer so,

Reviewed-by: David Fibson 
...
---
 tcp_vu.c    | 17 ++++++++++++-----
 udp_vu.c    |  8 ++++++--
 vu_common.c | 14 ++++++++++++++
 vu_common.h |  1 +
 4 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/tcp_vu.c b/tcp_vu.c
index 1c81ce3..db9db78 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -91,12 +91,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
  vu_set_element(&flags_elem[0], NULL, &flags_iov[0]);
elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1,
-			      hdrlen + sizeof(struct tcp_syn_opts), NULL);
+			      MAX(hdrlen + sizeof(*opts), ETH_ZLEN), NULL);
  if (elem_cnt != 1)
      return -1;
ASSERT(flags_elem[0].in_sg[0].iov_len >=
-	       hdrlen + sizeof(struct tcp_syn_opts));
+	       MAX(hdrlen + sizeof(*opts), ETH_ZLEN));
vu_set_vnethdr(vdev, flags_elem[0].in_sg[0].iov_base, 1);
@@ -138,6 +138,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
  tcp_fill_headers(c, conn, NULL, eh, ip4h, ip6h, th, &payload,
      	 NULL, seq, !*c->pcap);
+	vu_pad(&flags_elem[0].in_sg[0], hdrlen + optlen);
+
  if (*c->pcap) {
      pcap_iov(&flags_elem[0].in_sg[0], 1,
      	 sizeof(struct virtio_net_hdr_mrg_rxbuf));
@@ -211,7 +213,8 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
cnt = vu_collect(vdev, vq, &elem[elem_cnt],
      		 VIRTQUEUE_MAX_SIZE - elem_cnt,
-				 MIN(mss, fillsize) + hdrlen, &frame_size);
+				 MAX(MIN(mss, fillsize) + hdrlen, ETH_ZLEN),
+				 &frame_size);
      if (cnt == 0)
      	break;
@@ -254,6 +257,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
len -= iov->iov_len;
  }
+
  /* adjust head count */
  while (*head_cnt > 0 && head[*head_cnt - 1] >= i)
      (*head_cnt)--;
@@ -301,9 +305,9 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
  struct ethhdr *eh;
/* we guess the first iovec provided by the guest can embed
-	 * all the headers needed by L2 frame
+	 * all the headers needed by L2 frame, including any padding
   */
-	ASSERT(iov[0].iov_len >= hdrlen);
+	ASSERT(iov[0].iov_len >= MAX(hdrlen, ETH_ZLEN));
eh = vu_eth(base);
@@ -456,6 +460,9 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
+		/* Pad first/single buffer only, it's at least ETH_ZLEN long */
+		vu_pad(iov, dlen + hdrlen);
+
      if (*c->pcap) {
      	pcap_iov(iov, buf_cnt,
      		 sizeof(struct virtio_net_hdr_mrg_rxbuf));
diff --git a/udp_vu.c b/udp_vu.c
index 099677f..c30dcf9 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -72,8 +72,8 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
 {
  const struct vu_dev *vdev = c->vdev;
  int iov_cnt, idx, iov_used;
+	size_t off, hdrlen, l2len;
  struct msghdr msg  = { 0 };
-	size_t off, hdrlen;
ASSERT(!c->no_udp);
@@ -90,7 +90,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
      return 0;
/* reserve space for the headers */
-	ASSERT(iov_vu[0].iov_len >= hdrlen);
+	ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN));
  iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
  iov_vu[0].iov_len -= hdrlen;
@@ -116,6 +116,10 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
      iov_vu[idx].iov_len = off;
  iov_used = idx + !!off;
+	/* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */
+	l2len = *dlen + hdrlen - sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	vu_pad(&iov_vu[0], l2len);
+
  vu_set_vnethdr(vdev, iov_vu[0].iov_base, iov_used);
/* release unused buffers */
diff --git a/vu_common.c b/vu_common.c
index ce61fa6..c682498 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -293,3 +293,17 @@ err:
return -1;
 }
+
+/**
+ * vu_pad() - Pad 802.3 frame to minimum length (60 bytes) if needed
+ * @iov:	Buffer in iovec array where end of 802.3 frame is stored
+ * @l2len:	Layer-2 length already filled in frame
+ */
+void vu_pad(struct iovec *iov, size_t l2len)
+{
+	if (l2len >= ETH_ZLEN)
+		return;
+
+	memset((char *)iov->iov_base + iov->iov_len, 0, ETH_ZLEN - l2len);
+	iov->iov_len += ETH_ZLEN - l2len;
+}
diff --git a/vu_common.h b/vu_common.h
index c0883b2..27fe7e0 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -57,5 +57,6 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
 void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
      const struct timespec *now);
 int vu_send_single(const struct ctx *c, const void *buf, size_t size);
+void vu_pad(struct iovec *iov, size_t l2len);
#endif /* VU_COMMON_H */
-- 
2.43.0
-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson