Nit: It's not very obvious to me what the salient difference is between "throughout the network stack" (previous patch) and "throughout the packet processing path" (this one). On Fri, Nov 21, 2025 at 05:59:00PM +0100, Laurent Vivier wrote:
For vhost-user multiqueue support, we need to maintain separate packet pools for each queue pair, as packets from different queues should be processed independently.
Previously, we had single global packet pools (pool_tap4 and pool_tap6) for IPv4 and IPv6 packets. This worked fine for single-queue scenarios (passt and pasta), but doesn't support multiple concurrent receive queues.
Convert these to arrays of pools, one per queue pair, indexed by the queue pair number. The queue pair is simply the virtqueue index divided by 2 (since each queue pair consists of one RX and one TX virtqueue).
Add a qpair parameter throughout the packet processing pipeline: - tap4_handler() and tap6_handler(): specify which queue's packets to process - tap_flush_pools(): specify which queue's pools to flush - tap_handler(): specify which queue to handle - tap_add_packet(): specify which queue's pool to add the packet to
For passt and pasta (single queue), all calls use qpair=0. For vhost-user multiqueue, the queue pair is derived from the virtqueue index (index / 2) in vu_handle_tx().
The pool initialization in tap_sock_update_pool() is updated to initialize all queue pairs' pools from the same underlying buffer.
Signed-off-by: Laurent Vivier
Reviewed-by: David Gibson
--- tap.c | 107 ++++++++++++++++++++++++++++++---------------------- tap.h | 7 ++-- vu_common.c | 6 +-- 3 files changed, 68 insertions(+), 52 deletions(-)
diff --git a/tap.c b/tap.c index a842104687b7..529acecc9851 100644 --- a/tap.c +++ b/tap.c @@ -94,9 +94,13 @@ CHECK_FRAME_LEN(L2_MAX_LEN_VU); DIV_ROUND_UP(sizeof(pkt_buf), \ ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
-/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers */ -static PACKET_POOL_NOINIT(pool_tap4, TAP_MSGS_IP4); -static PACKET_POOL_NOINIT(pool_tap6, TAP_MSGS_IP6); +/* IPv4 (plus ARP) and IPv6 message batches from tap/guest to IP handlers + * One pool per queue pair for multiqueue support + */ +static PACKET_POOL_DECL(pool_tap4, TAP_MSGS_IP4) pool_tap4_storage[VHOST_USER_MAX_VQS / 2]; +static struct pool *pool_tap4[VHOST_USER_MAX_VQS / 2]; +static PACKET_POOL_DECL(pool_tap6, TAP_MSGS_IP6) pool_tap6_storage[VHOST_USER_MAX_VQS / 2]; +static struct pool *pool_tap6[VHOST_USER_MAX_VQS / 2];
#define TAP_SEQS 128 /* Different L4 tuples in one batch */ #define FRAGMENT_MSG_RATE 10 /* # seconds between fragment warnings */ @@ -703,21 +707,22 @@ static bool tap4_is_fragment(const struct iphdr *iph, /** * tap4_handler() - IPv4 and ARP packet handler for tap file descriptor * @c: Execution context + * @qpair: Queue pair * @now: Current timestamp * * Return: count of packets consumed by handlers */ -static int tap4_handler(struct ctx *c, const struct timespec *now) +static int tap4_handler(struct ctx *c, unsigned int qpair, const struct timespec *now) { unsigned int i, j, seq_count; struct tap4_l4_t *seq;
- if (!c->ifi4 || !pool_tap4->count) - return pool_tap4->count; + if (!c->ifi4 || !pool_tap4[qpair]->count) + return pool_tap4[qpair]->count;
i = 0; resume: - for (seq_count = 0, seq = NULL; i < pool_tap4->count; i++) { + for (seq_count = 0, seq = NULL; i < pool_tap4[qpair]->count; i++) { size_t l3len, hlen, l4len; struct ethhdr eh_storage; struct iphdr iph_storage; @@ -727,7 +732,7 @@ resume: struct iov_tail data; struct iphdr *iph;
- if (!packet_get(pool_tap4, i, &data)) + if (!packet_get(pool_tap4[qpair], i, &data)) continue;
eh = IOV_PEEK_HEADER(&data, eh_storage); @@ -794,8 +799,8 @@ resume: if (iph->protocol == IPPROTO_UDP) { struct iov_tail eh_data;
- packet_get(pool_tap4, i, &eh_data); - if (dhcp(c, 0, &eh_data)) + packet_get(pool_tap4[qpair], i, &eh_data); + if (dhcp(c, qpair, &eh_data)) continue; }
@@ -825,7 +830,7 @@ resume: goto append;
if (seq_count == TAP_SEQS) - break; /* Resume after flushing if i < pool_tap4->count */ + break; /* Resume after flushing if i < pool_tap4[qpair]->count */
Nit: overlong line
for (seq = tap4_l4 + seq_count - 1; seq >= tap4_l4; seq--) { if (L4_MATCH(iph, uh, seq)) { @@ -871,30 +876,31 @@ append: } }
- if (i < pool_tap4->count) + if (i < pool_tap4[qpair]->count) goto resume;
- return pool_tap4->count; + return pool_tap4[qpair]->count; }
/** * tap6_handler() - IPv6 packet handler for tap file descriptor * @c: Execution context + * @qpair: Queue pair * @now: Current timestamp * * Return: count of packets consumed by handlers */ -static int tap6_handler(struct ctx *c, const struct timespec *now) +static int tap6_handler(struct ctx *c, unsigned int qpair, const struct timespec *now) { unsigned int i, j, seq_count = 0; struct tap6_l4_t *seq;
- if (!c->ifi6 || !pool_tap6->count) - return pool_tap6->count; + if (!c->ifi6 || !pool_tap6[qpair]->count) + return pool_tap6[qpair]->count;
i = 0; resume: - for (seq_count = 0, seq = NULL; i < pool_tap6->count; i++) { + for (seq_count = 0, seq = NULL; i < pool_tap6[qpair]->count; i++) { size_t l4len, plen, check; struct in6_addr *saddr, *daddr; struct ipv6hdr ip6h_storage; @@ -906,7 +912,7 @@ resume: struct ipv6hdr *ip6h; uint8_t proto;
- if (!packet_get(pool_tap6, i, &data)) + if (!packet_get(pool_tap6[qpair], i, &data)) return -1;
eh = IOV_REMOVE_HEADER(&data, eh_storage); @@ -1014,7 +1020,7 @@ resume: goto append;
if (seq_count == TAP_SEQS) - break; /* Resume after flushing if i < pool_tap6->count */ + break; /* Resume after flushing if i < pool_tap6[qpair]->count */
for (seq = tap6_l4 + seq_count - 1; seq >= tap6_l4; seq--) { if (L4_MATCH(ip6h, proto, uh, seq)) { @@ -1061,39 +1067,42 @@ append: } }
- if (i < pool_tap6->count) + if (i < pool_tap6[qpair]->count) goto resume;
- return pool_tap6->count; + return pool_tap6[qpair]->count; }
/** - * tap_flush_pools() - Flush both IPv4 and IPv6 packet pools + * tap_flush_pools() - Flush both IPv4 and IPv6 packet pools for a given qpair */ -void tap_flush_pools(void) +void tap_flush_pools(unsigned int qpair) { - pool_flush(pool_tap4); - pool_flush(pool_tap6); + pool_flush(pool_tap4[qpair]); + pool_flush(pool_tap6[qpair]); }
/** * tap_handler() - IPv4/IPv6 and ARP packet handler for tap file descriptor * @c: Execution context + * @qpair: Queue pair * @now: Current timestamp */ -void tap_handler(struct ctx *c, const struct timespec *now) +void tap_handler(struct ctx *c, unsigned int qpair, const struct timespec *now) { - tap4_handler(c, now); - tap6_handler(c, now); + ASSERT(qpair < VHOST_USER_MAX_VQS / 2); + tap4_handler(c, qpair, now); + tap6_handler(c, qpair, now); }
/** * tap_add_packet() - Queue/capture packet, update notion of guest MAC address * @c: Execution context + * @qpair: Queue pair * @data: Packet to add to the pool * @now: Current timestamp */ -void tap_add_packet(struct ctx *c, struct iov_tail *data, +void tap_add_packet(struct ctx *c, unsigned int qpair, struct iov_tail *data, const struct timespec *now) { struct ethhdr eh_storage; @@ -1114,21 +1123,23 @@ void tap_add_packet(struct ctx *c, struct iov_tail *data, proto_update_l2_buf(c->guest_mac); }
+ ASSERT(qpair < VHOST_USER_MAX_VQS / 2); + switch (ntohs(eh->h_proto)) { case ETH_P_ARP: case ETH_P_IP: - if (!pool_can_fit(pool_tap4, data)) { - tap4_handler(c, now); - pool_flush(pool_tap4); + if (!pool_can_fit(pool_tap4[qpair], data)) { + tap4_handler(c, qpair, now); + pool_flush(pool_tap4[qpair]); } - packet_add(pool_tap4, data); + packet_add(pool_tap4[qpair], data); break; case ETH_P_IPV6: - if (!pool_can_fit(pool_tap6, data)) { - tap6_handler(c, now); - pool_flush(pool_tap6); + if (!pool_can_fit(pool_tap6[qpair], data)) { + tap6_handler(c, qpair, now); + pool_flush(pool_tap6[qpair]); } - packet_add(pool_tap6, data); + packet_add(pool_tap6[qpair], data); break; default: break; @@ -1168,7 +1179,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now) ssize_t n; char *p;
- tap_flush_pools(); + tap_flush_pools(0);
if (partial_len) { /* We have a partial frame from an earlier pass. Move it to the @@ -1212,7 +1223,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now) n -= sizeof(uint32_t);
data = IOV_TAIL_FROM_BUF(p, l2len, 0); - tap_add_packet(c, &data, now); + tap_add_packet(c, 0, &data, now);
p += l2len; n -= l2len; @@ -1221,7 +1232,7 @@ static void tap_passt_input(struct ctx *c, const struct timespec *now) partial_len = n; partial_frame = p;
- tap_handler(c, now); + tap_handler(c, 0, now); }
/** @@ -1251,7 +1262,7 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now) { ssize_t n, len;
- tap_flush_pools(); + tap_flush_pools(0);
for (n = 0; n <= (ssize_t)(sizeof(pkt_buf) - L2_MAX_LEN_PASTA); @@ -1280,10 +1291,10 @@ static void tap_pasta_input(struct ctx *c, const struct timespec *now) continue;
data = IOV_TAIL_FROM_BUF(pkt_buf + n, len, 0); - tap_add_packet(c, &data, now); + tap_add_packet(c, 0, &data, now); }
- tap_handler(c, now); + tap_handler(c, 0, now); }
/** @@ -1487,10 +1498,14 @@ static void tap_sock_tun_init(struct ctx *c) */ static void tap_sock_update_pool(void *base, size_t size) { - int i; + unsigned int i;
- pool_tap4_storage = PACKET_INIT(pool_tap4, TAP_MSGS_IP4, base, size); - pool_tap6_storage = PACKET_INIT(pool_tap6, TAP_MSGS_IP6, base, size); + for (i = 0; i < VHOST_USER_MAX_VQS / 2; i++) { + pool_tap4_storage[i] = PACKET_INIT(pool_tap4, TAP_MSGS_IP4, base, size); + pool_tap4[i] = (struct pool *)&pool_tap4_storage[i]; + pool_tap6_storage[i] = PACKET_INIT(pool_tap6, TAP_MSGS_IP6, base, size); + pool_tap6[i] = (struct pool *)&pool_tap6_storage[i]; + }
for (i = 0; i < TAP_SEQS; i++) { tap4_l4[i].p = PACKET_INIT(pool_l4, UIO_MAXIOV, base, size); diff --git a/tap.h b/tap.h index 92d3e5446991..f10c2a212a51 100644 --- a/tap.h +++ b/tap.h @@ -118,8 +118,9 @@ void tap_handler_passt(struct ctx *c, uint32_t events, int tap_sock_unix_open(char *sock_path); void tap_sock_reset(struct ctx *c); void tap_backend_init(struct ctx *c); -void tap_flush_pools(void); -void tap_handler(struct ctx *c, const struct timespec *now); -void tap_add_packet(struct ctx *c, struct iov_tail *data, +void tap_flush_pools(unsigned int qpair); +void tap_handler(struct ctx *c, unsigned int qpair, + const struct timespec *now); +void tap_add_packet(struct ctx *c, unsigned int qpair, struct iov_tail *data, const struct timespec *now); #endif /* TAP_H */ diff --git a/vu_common.c b/vu_common.c index 040ad067ffbf..63ead85a4674 100644 --- a/vu_common.c +++ b/vu_common.c @@ -170,7 +170,7 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
ASSERT(VHOST_USER_IS_QUEUE_TX(index));
- tap_flush_pools(); + tap_flush_pools(index / 2);
count = 0; out_sg_count = 0; @@ -196,11 +196,11 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
data = IOV_TAIL(elem[count].out_sg, elem[count].out_num, 0); if (IOV_DROP_HEADER(&data, struct virtio_net_hdr_mrg_rxbuf)) - tap_add_packet(vdev->context, &data, now); + tap_add_packet(vdev->context, index / 2, &data, now);
count++; } - tap_handler(vdev->context, now); + tap_handler(vdev->context, index / 2, now);
if (count) { int i; -- 2.51.0
-- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson