[PATCH v3 0/8] vhost-user,udp: Handle multiple iovec entries per virtqueue element

newer
Re: [PATCH v2 3/3] vu_common: Move...

Laurent Vivier

16 Mar 2026 16 Mar '26

7:07 p.m.

Some virtio-net drivers (notably iPXE) provide descriptors where the vnet header and the frame payload are in separate buffers, resulting in two iovec entries per virtqueue element. Currently, the RX (host to guest) path assumes a single iovec per element, which triggers: ASSERTION FAILED in virtqueue_map_desc (virtio.c:403): num_sg < max_num_sg This series reworks the UDP vhost-user receive path to support multiple iovec entries per element, fixing the iPXE crash. This series only addresses the UDP path. TCP vhost-user will be updated to use multi-iov elements in a subsequent series. v3: - include the series "Decouple iovec management from virtqueues elements" - because of this series, drop: "vu_common: Accept explicit iovec counts in vu_set_element()" "vu_common: Accept explicit iovec count per element in vu_init_elem()" "vu_common: Prepare to use multibuffer with guest RX" "vhost-user,udp: Use 2 iovec entries per element" - drop "vu_common: Pass iov_tail to vu_set_vnethdr()" as the specs insures a buffer is big enough to contain vnet header - introduce "with_header()" and merge "udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6()" and "udp_vu: Use iov_tail in udp_vu_prepare()" to use it v2: - add iov_truncate(), iov_memset() - remove iov_tail_truncate() and iov_tail_zero_end() - manage 802.3 minimum frame size Laurent Vivier (8): virtio: Pass iovec arrays as separate parameters to vu_queue_pop() vu_handle_tx: Pass actual remaining out_sg capacity to vu_queue_pop() vu_common: Move iovec management into vu_collect() vhost-user: Centralise Ethernet frame padding in vu_collect(), vu_pad() and vu_flush() udp_vu: Use iov_tail to manage virtqueue buffers udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller iov: Add IOV_PUT_HEADER() and with_header() to write header data back to iov_tail udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6() iov.c | 47 +++++++++++ iov.h | 27 ++++++- tcp_vu.c | 46 +++++------ udp.c | 129 ++++++++++++++++-------------- udp_internal.h | 6 +- udp_vu.c | 207 +++++++++++++++++++++++++------------------------ virtio.c | 29 +++++-- virtio.h | 4 +- vu_common.c | 149 ++++++++++++++++++++--------------- vu_common.h | 24 +----- 10 files changed, 385 insertions(+), 283 deletions(-) -- 2.53.0

Show replies by date

Laurent Vivier

16 Mar 16 Mar

7:07 p.m.

New subject: [PATCH v3 1/8] virtio: Pass iovec arrays as separate parameters to vu_queue_pop()

Currently vu_queue_pop() and vu_queue_map_desc() read the iovec arrays (in_sg/out_sg) and their sizes (in_num/out_num) from the vu_virtq_element struct. This couples the iovec storage to the element, requiring callers like vu_handle_tx() to pre-initialize the element fields before calling vu_queue_pop(). Pass the iovec arrays and their maximum sizes as separate parameters instead. vu_queue_map_desc() now writes the actual descriptor count and iovec pointers back into the element after mapping, rather than using the element as both input and output. This decouples the iovec storage from the element, which is a prerequisite for multi-buffer support where a single frame can span multiple virtqueue elements sharing a common iovec pool. No functional change. Signed-off-by: Laurent Vivier --- virtio.c | 29 ++++++++++++++++++++++------- virtio.h | 4 +++- vu_common.c | 14 +++++++------- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/virtio.c b/virtio.c index 447137ee83dd..a671163c27a0 100644 --- a/virtio.c +++ b/virtio.c @@ -428,12 +428,18 @@ static bool virtqueue_map_desc(const struct vu_dev *dev, * @vq: Virtqueue * @idx: First descriptor ring entry to map * @elem: Virtqueue element to store descriptor ring iov + * @in_sg: Incoming iovec array for device-writable descriptors + * @max_in_sg: Maximum number of entries in @in_sg + * @out_sg: Outgoing iovec array for device-readable descriptors + * @max_out_sg: Maximum number of entries in @out_sg * * Return: -1 if there is an error, 0 otherwise */ static int vu_queue_map_desc(const struct vu_dev *dev, struct vu_virtq *vq, unsigned int idx, - struct vu_virtq_element *elem) + struct vu_virtq_element *elem, + struct iovec *in_sg, size_t max_in_sg, + struct iovec *out_sg, size_t max_out_sg) { const struct vring_desc *desc = vq->vring.desc; struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; @@ -470,16 +476,16 @@ static int vu_queue_map_desc(const struct vu_dev *dev, /* Collect all the descriptors */ do { if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) { - if (!virtqueue_map_desc(dev, &in_num, elem->in_sg, - elem->in_num, + if (!virtqueue_map_desc(dev, &in_num, in_sg, + max_in_sg, le64toh(desc[i].addr), le32toh(desc[i].len))) return -1; } else { if (in_num) die("Incorrect order for descriptors"); - if (!virtqueue_map_desc(dev, &out_num, elem->out_sg, - elem->out_num, + if (!virtqueue_map_desc(dev, &out_num, out_sg, + max_out_sg, le64toh(desc[i].addr), le32toh(desc[i].len))) { return -1; @@ -496,7 +502,9 @@ static int vu_queue_map_desc(const struct vu_dev *dev, die("vhost-user: Failed to read descriptor list"); elem->index = idx; + elem->in_sg = in_sg; elem->in_num = in_num; + elem->out_sg = out_sg; elem->out_num = out_num; return 0; @@ -507,11 +515,17 @@ static int vu_queue_map_desc(const struct vu_dev *dev, * @dev: Vhost-user device * @vq: Virtqueue * @elem: Virtqueue element to fill with the entry information + * @in_sg: Incoming iovec array for device-writable descriptors + * @max_in_sg: Maximum number of entries in @in_sg + * @out_sg: Outgoing iovec array for device-readable descriptors + * @max_out_sg: Maximum number of entries in @out_sg * * Return: -1 if there is an error, 0 otherwise */ int vu_queue_pop(const struct vu_dev *dev, struct vu_virtq *vq, - struct vu_virtq_element *elem) + struct vu_virtq_element *elem, + struct iovec *in_sg, size_t max_in_sg, + struct iovec *out_sg, size_t max_out_sg) { unsigned int head; int ret; @@ -535,7 +549,8 @@ int vu_queue_pop(const struct vu_dev *dev, struct vu_virtq *vq, if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) vring_set_avail_event(vq, vq->last_avail_idx); - ret = vu_queue_map_desc(dev, vq, head, elem); + ret = vu_queue_map_desc(dev, vq, head, elem, in_sg, max_in_sg, + out_sg, max_out_sg); if (ret < 0) return ret; diff --git a/virtio.h b/virtio.h index d04bbe84e5c4..c7e447d59860 100644 --- a/virtio.h +++ b/virtio.h @@ -188,7 +188,9 @@ static inline bool vu_has_protocol_feature(const struct vu_dev *vdev, void vu_queue_notify(const struct vu_dev *dev, struct vu_virtq *vq); int vu_queue_pop(const struct vu_dev *dev, struct vu_virtq *vq, - struct vu_virtq_element *elem); + struct vu_virtq_element *elem, + struct iovec *in_sg, size_t max_in_sg, + struct iovec *out_sg, size_t max_out_sg); void vu_queue_detach_element(struct vu_virtq *vq); void vu_queue_unpop(struct vu_virtq *vq); bool vu_queue_rewind(struct vu_virtq *vq, unsigned int num); diff --git a/vu_common.c b/vu_common.c index 5f2ce18e5b71..4d809ac38a4b 100644 --- a/vu_common.c +++ b/vu_common.c @@ -91,7 +91,11 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq, struct iovec *iov; int ret; - ret = vu_queue_pop(vdev, vq, &elem[elem_cnt]); + ret = vu_queue_pop(vdev, vq, &elem[elem_cnt], + elem[elem_cnt].in_sg, + elem[elem_cnt].in_num, + elem[elem_cnt].out_sg, + elem[elem_cnt].out_num); if (ret < 0) break; @@ -178,12 +182,8 @@ static void vu_handle_tx(struct vu_dev *vdev, int index, int ret; struct iov_tail data; - elem[count].out_num = VU_MAX_TX_BUFFER_NB; - elem[count].out_sg = &out_sg[out_sg_count]; - elem[count].in_num = 0; - elem[count].in_sg = NULL; - - ret = vu_queue_pop(vdev, vq, &elem[count]); + ret = vu_queue_pop(vdev, vq, &elem[count], NULL, 0, + &out_sg[out_sg_count], VU_MAX_TX_BUFFER_NB); if (ret < 0) break; out_sg_count += elem[count].out_num; -- 2.53.0

Laurent Vivier

7:07 p.m.

New subject: [PATCH v3 3/8] vu_common: Move iovec management into vu_collect()

Previously, callers had to pre-initialize virtqueue elements with iovec entries using vu_set_element() or vu_init_elem() before calling vu_collect(). This meant each element owned a fixed, pre-assigned iovec slot. Move the iovec array into vu_collect() as explicit parameters (in_sg, max_in_sg, and in_num), letting it pass the remaining iovec capacity directly to vu_queue_pop(). A running current_iov counter tracks consumed entries across elements, so multiple elements share a single iovec pool. The optional in_num output parameter reports how many iovec entries were consumed, allowing callers to track usage across multiple vu_collect() calls. This removes vu_set_element() and vu_init_elem() which are no longer needed, and is a prerequisite for multi-buffer support where a single virtqueue element can use more than one iovec entry. For now, callers assert the current single-iovec-per-element invariant until they are updated to handle multiple iovecs. Signed-off-by: Laurent Vivier --- tcp_vu.c | 25 +++++++++++--------- udp_vu.c | 21 ++++++++++------- vu_common.c | 68 ++++++++++++++++++++++++----------------------------- vu_common.h | 22 +++-------------- 4 files changed, 60 insertions(+), 76 deletions(-) diff --git a/tcp_vu.c b/tcp_vu.c index fd734e857b3b..d470ab54bcea 100644 --- a/tcp_vu.c +++ b/tcp_vu.c @@ -87,13 +87,13 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) hdrlen = tcp_vu_hdrlen(CONN_V6(conn)); - vu_set_element(&flags_elem[0], NULL, &flags_iov[0]); - elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1, + &flags_iov[0], 1, NULL, MAX(hdrlen + sizeof(*opts), ETH_ZLEN + VNET_HLEN), NULL); if (elem_cnt != 1) return -1; + ASSERT(flags_elem[0].in_num == 1); ASSERT(flags_elem[0].in_sg[0].iov_len >= MAX(hdrlen + sizeof(*opts), ETH_ZLEN + VNET_HLEN)); @@ -148,9 +148,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) nb_ack = 1; if (flags & DUP_ACK) { - vu_set_element(&flags_elem[1], NULL, &flags_iov[1]); - elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1, + &flags_iov[1], 1, NULL, flags_elem[0].in_sg[0].iov_len, NULL); if (elem_cnt == 1 && flags_elem[1].in_sg[0].iov_len >= @@ -191,8 +190,8 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, const struct vu_dev *vdev = c->vdev; struct msghdr mh_sock = { 0 }; uint16_t mss = MSS_GET(conn); + size_t hdrlen, iov_used; int s = conn->sock; - size_t hdrlen; int elem_cnt; ssize_t ret; int i; @@ -201,22 +200,26 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, hdrlen = tcp_vu_hdrlen(v6); - vu_init_elem(elem, &iov_vu[DISCARD_IOV_NUM], VIRTQUEUE_MAX_SIZE); - + iov_used = 0; elem_cnt = 0; *head_cnt = 0; - while (fillsize > 0 && elem_cnt < VIRTQUEUE_MAX_SIZE) { + while (fillsize > 0 && elem_cnt < ARRAY_SIZE(elem) && + iov_used < VIRTQUEUE_MAX_SIZE) { + size_t frame_size, dlen, in_num; struct iovec *iov; - size_t frame_size, dlen; int cnt; cnt = vu_collect(vdev, vq, &elem[elem_cnt], - VIRTQUEUE_MAX_SIZE - elem_cnt, + ARRAY_SIZE(elem) - elem_cnt, + &iov_vu[DISCARD_IOV_NUM + iov_used], + VIRTQUEUE_MAX_SIZE - iov_used, &in_num, MAX(MIN(mss, fillsize) + hdrlen, ETH_ZLEN + VNET_HLEN), &frame_size); if (cnt == 0) break; + ASSERT((size_t)cnt == in_num); /* one iovec per element */ + iov_used += in_num; dlen = frame_size - hdrlen; /* reserve space for headers in iov */ @@ -247,7 +250,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, ret -= already_sent; /* adjust iov number and length of the last iov */ - i = iov_truncate(&iov_vu[DISCARD_IOV_NUM], elem_cnt, ret); + i = iov_truncate(&iov_vu[DISCARD_IOV_NUM], iov_used, ret); /* adjust head count */ while (*head_cnt > 0 && head[*head_cnt - 1] >= i) diff --git a/udp_vu.c b/udp_vu.c index 5effca777e0a..acc18d375a0f 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -71,9 +71,10 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, bool v6, ssize_t *dlen) { const struct vu_dev *vdev = c->vdev; + int elem_cnt, elem_used, iov_used; struct msghdr msg = { 0 }; - int iov_cnt, iov_used; size_t hdrlen, l2len; + size_t iov_cnt; ASSERT(!c->no_udp); @@ -89,13 +90,14 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, /* compute L2 header length */ hdrlen = udp_vu_hdrlen(v6); - vu_init_elem(elem, iov_vu, VIRTQUEUE_MAX_SIZE); - - iov_cnt = vu_collect(vdev, vq, elem, VIRTQUEUE_MAX_SIZE, - IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL); - if (iov_cnt == 0) + elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem), + iov_vu, ARRAY_SIZE(iov_vu), &iov_cnt, + IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL); + if (elem_cnt == 0) return -1; + ASSERT((size_t)elem_cnt == iov_cnt); /* one iovec per element */ + /* reserve space for the headers */ ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN)); iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen; @@ -107,7 +109,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, *dlen = recvmsg(s, &msg, 0); if (*dlen < 0) { - vu_queue_rewind(vq, iov_cnt); + vu_queue_rewind(vq, elem_cnt); return -1; } @@ -116,15 +118,16 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, iov_vu[0].iov_len += hdrlen; iov_used = iov_truncate(iov_vu, iov_cnt, *dlen + hdrlen); + elem_used = iov_used; /* one iovec per element */ /* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */ l2len = *dlen + hdrlen - VNET_HLEN; vu_pad(&iov_vu[0], l2len); - vu_set_vnethdr(iov_vu[0].iov_base, iov_used); + vu_set_vnethdr(iov_vu[0].iov_base, elem_used); /* release unused buffers */ - vu_queue_rewind(vq, iov_cnt - iov_used); + vu_queue_rewind(vq, elem_cnt - elem_used); return iov_used; } diff --git a/vu_common.c b/vu_common.c index ed0033d6bb11..d5fca9e52004 100644 --- a/vu_common.c +++ b/vu_common.c @@ -51,28 +51,15 @@ int vu_packet_check_range(struct vdev_memory *memory, return -1; } -/** - * vu_init_elem() - initialize an array of virtqueue elements with 1 iov in each - * @elem: Array of virtqueue elements to initialize - * @iov: Array of iovec to assign to virtqueue element - * @elem_cnt: Number of virtqueue element - */ -void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov, int elem_cnt) -{ - int i; - - for (i = 0; i < elem_cnt; i++) - vu_set_element(&elem[i], NULL, &iov[i]); -} - /** * vu_collect() - collect virtio buffers from a given virtqueue * @vdev: vhost-user device * @vq: virtqueue to collect from - * @elem: Array of virtqueue element - * each element must be initialized with one iovec entry - * in the in_sg array. + * @elem: Array of @max_elem virtqueue elements * @max_elem: Number of virtqueue elements in the array + * @in_sg: Incoming iovec array for device-writable descriptors + * @max_in_sg: Maximum number of entries in @in_sg + * @in_num: Number of collected entries from @in_sg (output) * @size: Maximum size of the data in the frame * @collected: Collected buffer length, up to @size, set on return * @@ -80,20 +67,21 @@ void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov, int elem_cnt */ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq, struct vu_virtq_element *elem, int max_elem, + struct iovec *in_sg, size_t max_in_sg, size_t *in_num, size_t size, size_t *collected) { size_t current_size = 0; + size_t current_iov = 0; int elem_cnt = 0; - while (current_size < size && elem_cnt < max_elem) { - struct iovec *iov; + while (current_size < size && elem_cnt < max_elem && + current_iov < max_in_sg) { int ret; ret = vu_queue_pop(vdev, vq, &elem[elem_cnt], - elem[elem_cnt].in_sg, - elem[elem_cnt].in_num, - elem[elem_cnt].out_sg, - elem[elem_cnt].out_num); + &in_sg[current_iov], + max_in_sg - current_iov, + NULL, 0); if (ret < 0) break; @@ -103,18 +91,22 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq, break; } - iov = &elem[elem_cnt].in_sg[0]; - - if (iov->iov_len > size - current_size) - iov->iov_len = size - current_size; + elem[elem_cnt].in_num = iov_truncate(elem[elem_cnt].in_sg, + elem[elem_cnt].in_num, + size - current_size); - current_size += iov->iov_len; + current_size += iov_size(elem[elem_cnt].in_sg, + elem[elem_cnt].in_num); + current_iov += elem[elem_cnt].in_num; elem_cnt++; if (!vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) break; } + if (in_num) + *in_num = current_iov; + if (collected) *collected = current_size; @@ -147,8 +139,11 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq, { int i; - for (i = 0; i < elem_cnt; i++) - vu_queue_fill(vdev, vq, &elem[i], elem[i].in_sg[0].iov_len, i); + for (i = 0; i < elem_cnt; i++) { + size_t elem_size = iov_size(elem[i].in_sg, elem[i].in_num); + + vu_queue_fill(vdev, vq, &elem[i], elem_size, i); + } vu_queue_flush(vdev, vq, elem_cnt); vu_queue_notify(vdev, vq); @@ -246,7 +241,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size) struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE]; struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; - size_t total; + size_t total, in_num; int elem_cnt; int i; @@ -257,11 +252,10 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size) return -1; } - vu_init_elem(elem, in_sg, VIRTQUEUE_MAX_SIZE); - size += VNET_HLEN; - elem_cnt = vu_collect(vdev, vq, elem, VIRTQUEUE_MAX_SIZE, size, &total); - if (total < size) { + elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem), in_sg, + ARRAY_SIZE(in_sg), &in_num, size, &total); + if (elem_cnt == 0 || total < size) { debug("vu_send_single: no space to send the data " "elem_cnt %d size %zd", elem_cnt, total); goto err; @@ -272,10 +266,10 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size) total -= VNET_HLEN; /* copy data from the buffer to the iovec */ - iov_from_buf(in_sg, elem_cnt, VNET_HLEN, buf, total); + iov_from_buf(in_sg, in_num, VNET_HLEN, buf, total); if (*c->pcap) - pcap_iov(in_sg, elem_cnt, VNET_HLEN); + pcap_iov(in_sg, in_num, VNET_HLEN); vu_flush(vdev, vq, elem, elem_cnt); diff --git a/vu_common.h b/vu_common.h index 865d9771fa89..6c31630e8712 100644 --- a/vu_common.h +++ b/vu_common.h @@ -35,26 +35,10 @@ static inline void *vu_payloadv6(void *base) return (struct ipv6hdr *)vu_ip(base) + 1; } -/** - * vu_set_element() - Initialize a vu_virtq_element - * @elem: Element to initialize - * @out_sg: One out iovec entry to set in elem - * @in_sg: One in iovec entry to set in elem - */ -static inline void vu_set_element(struct vu_virtq_element *elem, - struct iovec *out_sg, struct iovec *in_sg) -{ - elem->out_num = !!out_sg; - elem->out_sg = out_sg; - elem->in_num = !!in_sg; - elem->in_sg = in_sg; -} - -void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov, - int elem_cnt); int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq, - struct vu_virtq_element *elem, int max_elem, size_t size, - size_t *collected); + struct vu_virtq_element *elem, int max_elem, + struct iovec *in_sg, size_t max_in_sg, size_t *in_num, + size_t size, size_t *collected); void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers); void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq, struct vu_virtq_element *elem, int elem_cnt); -- 2.53.0

Laurent Vivier

7:07 p.m.

New subject: [PATCH v3 2/8] vu_handle_tx: Pass actual remaining out_sg capacity to vu_queue_pop()

In vu_handle_tx(), pass the actual remaining iovec capacity (ARRAY_SIZE(out_sg) - out_sg_count) to vu_queue_pop() rather than a fixed VU_MAX_TX_BUFFER_NB. This enables dynamic allocation of iovec entries to each element rather than reserving a fixed number of slots per descriptor. Signed-off-by: Laurent Vivier --- vu_common.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/vu_common.c b/vu_common.c index 4d809ac38a4b..ed0033d6bb11 100644 --- a/vu_common.c +++ b/vu_common.c @@ -20,8 +20,6 @@ #include "migrate.h" #include "epoll_ctl.h" -#define VU_MAX_TX_BUFFER_NB 2 - /** * vu_packet_check_range() - Check if a given memory zone is contained in * a mapped guest memory region @@ -177,13 +175,14 @@ static void vu_handle_tx(struct vu_dev *vdev, int index, count = 0; out_sg_count = 0; - while (count < VIRTQUEUE_MAX_SIZE && - out_sg_count + VU_MAX_TX_BUFFER_NB <= VIRTQUEUE_MAX_SIZE) { - int ret; + while (count < ARRAY_SIZE(elem) && + out_sg_count < ARRAY_SIZE(out_sg)) { struct iov_tail data; + int ret; ret = vu_queue_pop(vdev, vq, &elem[count], NULL, 0, - &out_sg[out_sg_count], VU_MAX_TX_BUFFER_NB); + &out_sg[out_sg_count], + ARRAY_SIZE(out_sg) - out_sg_count); if (ret < 0) break; out_sg_count += elem[count].out_num; -- 2.53.0

Laurent Vivier

7:07 p.m.

New subject: [PATCH v3 4/8] vhost-user: Centralise Ethernet frame padding in vu_collect(), vu_pad() and vu_flush()

The per-protocol padding done by vu_pad() in tcp_vu.c and udp_vu.c was only correct for single-buffer frames, and assumed the padding area always fell within the first iov. It also relied on each caller computing the right MAX(..., ETH_ZLEN + VNET_HLEN) size for vu_collect() and calling vu_pad() at the right point. Centralise padding logic into three shared vhost-user helpers instead: - vu_collect() now ensures at least ETH_ZLEN + VNET_HLEN bytes of buffer space are collected, so there is always room for a minimum-sized frame. - vu_pad() replaces the old single-iov helper with a new implementation that takes a full iovec array plus a 'skipped' byte count. It uses a new iov_memset() helper in iov.c to zero-fill the padding area across iovec boundaries, then calls iov_truncate() to set the logical frame size. - vu_flush() computes the actual frame length (accounting for VIRTIO_NET_F_MRG_RXBUF multi-buffer frames) and passes the padded length to vu_queue_fill(). Callers in tcp_vu.c, udp_vu.c and vu_send_single() now use the new vu_pad() in place of the old pad-then-truncate sequences and the MAX(..., ETH_ZLEN + VNET_HLEN) size calculations passed to vu_collect(). Centralising padding here will also ease the move to multi-iovec per element support, since there will be a single place to update. In vu_send_single(), fix padding, truncation and data copy to use the requested frame size rather than the total available buffer space from vu_collect(), which could be larger. Also add matching padding, truncation and explicit size to vu_collect() for the DUP_ACK path in tcp_vu_send_flag(). Signed-off-by: Laurent Vivier --- iov.c | 26 ++++++++++++++++++ iov.h | 2 ++ tcp_vu.c | 23 ++++++---------- udp_vu.c | 10 +++---- vu_common.c | 76 ++++++++++++++++++++++++++++++++++++----------------- vu_common.h | 2 +- 6 files changed, 92 insertions(+), 47 deletions(-) diff --git a/iov.c b/iov.c index 31a3f5bc29e5..34d42c7a2264 100644 --- a/iov.c +++ b/iov.c @@ -169,6 +169,32 @@ size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size) return i; } +/** + * iov_memset() - Set bytes of an IO vector to a given value + * @iov: IO vector + * @iov_cnt: Number of elements in @iov + * @offset: Byte offset in the iovec at which to start + * @c: Byte value to fill with + * @length: Number of bytes to set + * Will write less than @length bytes if it runs out of space in + * the iov + */ +void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c, + size_t length) +{ + size_t i; + + i = iov_skip_bytes(iov, iov_cnt, offset, &offset); + + for ( ; i < iov_cnt && length; i++) { + size_t n = MIN(iov[i].iov_len - offset, length); + + memset((char *)iov[i].iov_base + offset, c, n); + offset = 0; + length -= n; + } +} + /** * iov_tail_prune() - Remove any unneeded buffers from an IOV tail * @tail: IO vector tail (modified) diff --git a/iov.h b/iov.h index b4e50b0fca5a..d295d05b3bab 100644 --- a/iov.h +++ b/iov.h @@ -30,6 +30,8 @@ size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt, size_t offset, void *buf, size_t bytes); size_t iov_size(const struct iovec *iov, size_t iov_cnt); size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size); +void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c, + size_t length); /* * DOC: Theory of Operation, struct iov_tail diff --git a/tcp_vu.c b/tcp_vu.c index d470ab54bcea..80464d450fc9 100644 --- a/tcp_vu.c +++ b/tcp_vu.c @@ -72,12 +72,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; struct vu_virtq_element flags_elem[2]; - size_t optlen, hdrlen, l2len; struct ipv6hdr *ip6h = NULL; struct iphdr *ip4h = NULL; struct iovec flags_iov[2]; struct tcp_syn_opts *opts; struct iov_tail payload; + size_t optlen, hdrlen; struct tcphdr *th; struct ethhdr *eh; uint32_t seq; @@ -89,7 +89,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1, &flags_iov[0], 1, NULL, - MAX(hdrlen + sizeof(*opts), ETH_ZLEN + VNET_HLEN), NULL); + hdrlen + sizeof(*opts), NULL); if (elem_cnt != 1) return -1; @@ -131,7 +131,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) return ret; } - iov_truncate(&flags_iov[0], 1, hdrlen + optlen); + vu_pad(&flags_iov[0], 1, 0, hdrlen + optlen); payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen); if (flags & KEEPALIVE) @@ -140,9 +140,6 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload, NULL, seq, !*c->pcap); - l2len = optlen + hdrlen - VNET_HLEN; - vu_pad(&flags_elem[0].in_sg[0], l2len); - if (*c->pcap) pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN); nb_ack = 1; @@ -150,10 +147,11 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) if (flags & DUP_ACK) { elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1, &flags_iov[1], 1, NULL, - flags_elem[0].in_sg[0].iov_len, NULL); + hdrlen + optlen, NULL); if (elem_cnt == 1 && flags_elem[1].in_sg[0].iov_len >= flags_elem[0].in_sg[0].iov_len) { + vu_pad(&flags_iov[1], 1, 0, hdrlen + optlen); memcpy(flags_elem[1].in_sg[0].iov_base, flags_elem[0].in_sg[0].iov_base, flags_elem[0].in_sg[0].iov_len); @@ -213,7 +211,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, ARRAY_SIZE(elem) - elem_cnt, &iov_vu[DISCARD_IOV_NUM + iov_used], VIRTQUEUE_MAX_SIZE - iov_used, &in_num, - MAX(MIN(mss, fillsize) + hdrlen, ETH_ZLEN + VNET_HLEN), + MIN(mss, fillsize) + hdrlen, &frame_size); if (cnt == 0) break; @@ -225,6 +223,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, /* reserve space for headers in iov */ iov = &elem[elem_cnt].in_sg[0]; ASSERT(iov->iov_len >= hdrlen); + iov->iov_base = (char *)iov->iov_base + hdrlen; iov->iov_len -= hdrlen; head[(*head_cnt)++] = elem_cnt; @@ -249,8 +248,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, if (!peek_offset_cap) ret -= already_sent; - /* adjust iov number and length of the last iov */ - i = iov_truncate(&iov_vu[DISCARD_IOV_NUM], iov_used, ret); + i = vu_pad(&iov_vu[DISCARD_IOV_NUM], iov_used, hdrlen, ret); /* adjust head count */ while (*head_cnt > 0 && head[*head_cnt - 1] >= i) @@ -446,7 +444,6 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) size_t frame_size = iov_size(iov, buf_cnt); bool push = i == head_cnt - 1; ssize_t dlen; - size_t l2len; ASSERT(frame_size >= hdrlen); @@ -460,10 +457,6 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push); - /* Pad first/single buffer only, it's at least ETH_ZLEN long */ - l2len = dlen + hdrlen - VNET_HLEN; - vu_pad(iov, l2len); - if (*c->pcap) pcap_iov(iov, buf_cnt, VNET_HLEN); diff --git a/udp_vu.c b/udp_vu.c index acc18d375a0f..5e030effa703 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -73,8 +73,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, const struct vu_dev *vdev = c->vdev; int elem_cnt, elem_used, iov_used; struct msghdr msg = { 0 }; - size_t hdrlen, l2len; - size_t iov_cnt; + size_t iov_cnt, hdrlen; ASSERT(!c->no_udp); @@ -100,6 +99,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, /* reserve space for the headers */ ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN)); + iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen; iov_vu[0].iov_len -= hdrlen; @@ -117,13 +117,9 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen; iov_vu[0].iov_len += hdrlen; - iov_used = iov_truncate(iov_vu, iov_cnt, *dlen + hdrlen); + iov_used = vu_pad(iov_vu, iov_cnt, 0, *dlen + hdrlen); elem_used = iov_used; /* one iovec per element */ - /* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */ - l2len = *dlen + hdrlen - VNET_HLEN; - vu_pad(&iov_vu[0], l2len); - vu_set_vnethdr(iov_vu[0].iov_base, elem_used); /* release unused buffers */ diff --git a/vu_common.c b/vu_common.c index d5fca9e52004..4ba407f6faf7 100644 --- a/vu_common.c +++ b/vu_common.c @@ -74,6 +74,7 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq, size_t current_iov = 0; int elem_cnt = 0; + size = MAX(size, ETH_ZLEN + VNET_HLEN); /* Ethernet minimum size */ while (current_size < size && elem_cnt < max_elem && current_iov < max_in_sg) { int ret; @@ -113,6 +114,25 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq, return elem_cnt; } +/** + * vu_pad() - Pad short frames to minimum Ethernet length and truncate iovec + * @iov: Pointer to iovec array + * @cnt: Number of entries in @iov + * @skipped: Bytes already accounted for in the frame but not in @iov + * @size: Data length in @iov + * + * Return: number of iovec entries after truncation + */ +size_t vu_pad(struct iovec *iov, size_t cnt, size_t skipped, size_t size) +{ + if (skipped + size < ETH_ZLEN + VNET_HLEN) { + iov_memset(iov, cnt, size, 0, + ETH_ZLEN + VNET_HLEN - (skipped + size)); + } + + return iov_truncate(iov, cnt, size); +} + /** * vu_set_vnethdr() - set virtio-net headers * @vnethdr: Address of the header to set @@ -137,12 +157,32 @@ void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers) void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq, struct vu_virtq_element *elem, int elem_cnt) { - int i; - - for (i = 0; i < elem_cnt; i++) { - size_t elem_size = iov_size(elem[i].in_sg, elem[i].in_num); - - vu_queue_fill(vdev, vq, &elem[i], elem_size, i); + int i, j, num_elements; + + for (i = 0; i < elem_cnt; i += num_elements) { + const struct virtio_net_hdr_mrg_rxbuf *vnethdr; + size_t len, padding, elem_size; + + vnethdr = elem[i].in_sg[0].iov_base; + num_elements = le16toh(vnethdr->num_buffers); + + len = 0; + for (j = 0; j < num_elements - 1; j++) { + elem_size = iov_size(elem[i + j].in_sg, + elem[i + j].in_num); + vu_queue_fill(vdev, vq, &elem[i + j], + elem_size, i + j); + len += elem_size; + } + /* pad the last element to have an Ethernet minimum size */ + elem_size = iov_size(elem[i + j].in_sg, elem[i + j].in_num); + if (ETH_ZLEN + VNET_HLEN > len + elem_size) + padding = ETH_ZLEN + VNET_HLEN - (len + elem_size); + else + padding = 0; + + vu_queue_fill(vdev, vq, &elem[i + j], elem_size + padding, + i + j); } vu_queue_flush(vdev, vq, elem_cnt); @@ -261,38 +301,26 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size) goto err; } + in_num = vu_pad(in_sg, in_num, 0, size); + vu_set_vnethdr(in_sg[0].iov_base, elem_cnt); - total -= VNET_HLEN; + size -= VNET_HLEN; /* copy data from the buffer to the iovec */ - iov_from_buf(in_sg, in_num, VNET_HLEN, buf, total); + iov_from_buf(in_sg, in_num, VNET_HLEN, buf, size); if (*c->pcap) pcap_iov(in_sg, in_num, VNET_HLEN); vu_flush(vdev, vq, elem, elem_cnt); - trace("vhost-user sent %zu", total); + trace("vhost-user sent %zu", size); - return total; + return size; err: for (i = 0; i < elem_cnt; i++) vu_queue_detach_element(vq); return -1; } - -/** - * vu_pad() - Pad 802.3 frame to minimum length (60 bytes) if needed - * @iov: Buffer in iovec array where end of 802.3 frame is stored - * @l2len: Layer-2 length already filled in frame - */ -void vu_pad(struct iovec *iov, size_t l2len) -{ - if (l2len >= ETH_ZLEN) - return; - - memset((char *)iov->iov_base + iov->iov_len, 0, ETH_ZLEN - l2len); - iov->iov_len += ETH_ZLEN - l2len; -} diff --git a/vu_common.h b/vu_common.h index 6c31630e8712..73cc6ea9949b 100644 --- a/vu_common.h +++ b/vu_common.h @@ -39,12 +39,12 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq, struct vu_virtq_element *elem, int max_elem, struct iovec *in_sg, size_t max_in_sg, size_t *in_num, size_t size, size_t *collected); +size_t vu_pad(struct iovec *iov, size_t cnt, size_t skipped, size_t size); void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers); void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq, struct vu_virtq_element *elem, int elem_cnt); void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref, const struct timespec *now); int vu_send_single(const struct ctx *c, const void *buf, size_t size); -void vu_pad(struct iovec *iov, size_t l2len); #endif /* VU_COMMON_H */ -- 2.53.0

Laurent Vivier

7:07 p.m.

New subject: [PATCH v3 5/8] udp_vu: Use iov_tail to manage virtqueue buffers

Replace direct iovec pointer arithmetic in UDP vhost-user handling with iov_tail operations. udp_vu_sock_recv() now takes an iov/cnt pair instead of using the file-scoped iov_vu array, and returns the data length rather than the iov count. Internally it uses IOV_TAIL() to create a view past the L2/L3/L4 headers, and iov_tail_clone() to build the recvmsg() iovec, removing the manual pointer offset and restore pattern. udp_vu_prepare() and udp_vu_csum() take a const struct iov_tail * instead of referencing iov_vu directly, making data flow explicit. udp_vu_csum() uses iov_drop_header() and IOV_REMOVE_HEADER() to locate the UDP header and payload, replacing manual offset calculations via vu_payloadv4()/vu_payloadv6(). Signed-off-by: Laurent Vivier --- udp_vu.c | 123 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 70 insertions(+), 53 deletions(-) diff --git a/udp_vu.c b/udp_vu.c index 5e030effa703..8b0de312949c 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -59,21 +59,26 @@ static size_t udp_vu_hdrlen(bool v6) /** * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers * @c: Execution context + * @iov: IO vector for the frame (in/out) + * @cnt: Number of IO vector entries (in/out) * @vq: virtqueue to use to receive data * @s: Socket to receive from * @v6: Set for IPv6 connections - * @dlen: Size of received data (output) * - * Return: number of iov entries used to store the datagram, 0 if the datagram + * Return: size of received data, 0 if the datagram * was discarded because the virtqueue is not ready, -1 on error */ -static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, - bool v6, ssize_t *dlen) +static ssize_t udp_vu_sock_recv(const struct ctx *c, struct iovec *iov, + size_t *cnt, unsigned *elem_used, + struct vu_virtq *vq, int s, bool v6) { const struct vu_dev *vdev = c->vdev; - int elem_cnt, elem_used, iov_used; struct msghdr msg = { 0 }; - size_t iov_cnt, hdrlen; + struct iov_tail payload; + size_t hdrlen, iov_used; + unsigned elem_cnt; + unsigned i, j; + ssize_t dlen; ASSERT(!c->no_udp); @@ -83,6 +88,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, if (recvmsg(s, &msg, MSG_DONTWAIT) < 0) debug_perror("Failed to discard datagram"); + *cnt = 0; return 0; } @@ -90,68 +96,70 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s, hdrlen = udp_vu_hdrlen(v6); elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem), - iov_vu, ARRAY_SIZE(iov_vu), &iov_cnt, + iov, *cnt, &iov_used, IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL); if (elem_cnt == 0) return -1; - ASSERT((size_t)elem_cnt == iov_cnt); /* one iovec per element */ + ASSERT((size_t)elem_cnt == iov_used); /* one iovec per element */ - /* reserve space for the headers */ - ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN)); + payload = IOV_TAIL(iov, iov_used, hdrlen); - iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen; - iov_vu[0].iov_len -= hdrlen; + struct iovec msg_iov[payload.cnt]; + msg.msg_iov = msg_iov; + msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload); /* read data from the socket */ - msg.msg_iov = iov_vu; - msg.msg_iovlen = iov_cnt; - - *dlen = recvmsg(s, &msg, 0); - if (*dlen < 0) { + dlen = recvmsg(s, &msg, 0); + if (dlen < 0) { vu_queue_rewind(vq, elem_cnt); return -1; } - /* restore the pointer to the headers address */ - iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen; - iov_vu[0].iov_len += hdrlen; + *cnt = vu_pad(iov, iov_used, 0, dlen + hdrlen); - iov_used = vu_pad(iov_vu, iov_cnt, 0, *dlen + hdrlen); - elem_used = iov_used; /* one iovec per element */ + *elem_used = 0; + for (i = 0, j = 0; j < *cnt && i < elem_cnt; i++) { + if (j + elem[i].in_num > *cnt) + elem[i].in_num = *cnt - j; + j += elem[i].in_num; + (*elem_used)++; + } - vu_set_vnethdr(iov_vu[0].iov_base, elem_used); + vu_set_vnethdr(iov[0].iov_base, *elem_used); /* release unused buffers */ - vu_queue_rewind(vq, elem_cnt - elem_used); + vu_queue_rewind(vq, elem_cnt - *elem_used); - return iov_used; + return dlen; } /** * udp_vu_prepare() - Prepare the packet header * @c: Execution context + * @data: IO vector tail for the frame * @toside: Address information for one side of the flow * @dlen: Packet data length * * Return: Layer-4 length */ -static size_t udp_vu_prepare(const struct ctx *c, +static size_t udp_vu_prepare(const struct ctx *c, const struct iov_tail *data, const struct flowside *toside, ssize_t dlen) { + const struct iovec *iov = data->iov; struct ethhdr *eh; size_t l4len; /* ethernet header */ - eh = vu_eth(iov_vu[0].iov_base); + eh = vu_eth(iov[0].iov_base); memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest)); memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source)); /* initialize header */ if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) { - struct iphdr *iph = vu_ip(iov_vu[0].iov_base); - struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base); + struct iphdr *iph = vu_ip(iov[0].iov_base); + struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base); eh->h_proto = htons(ETH_P_IP); @@ -159,8 +167,8 @@ static size_t udp_vu_prepare(const struct ctx *c, l4len = udp_update_hdr4(iph, bp, toside, dlen, true); } else { - struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base); - struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base); + struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base); + struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base); eh->h_proto = htons(ETH_P_IPV6); @@ -175,25 +183,29 @@ static size_t udp_vu_prepare(const struct ctx *c, /** * udp_vu_csum() - Calculate and set checksum for a UDP packet * @toside: Address information for one side of the flow - * @iov_used: Number of used iov_vu items + * @data: IO vector tail for the frame (including vnet header) */ -static void udp_vu_csum(const struct flowside *toside, int iov_used) +static void udp_vu_csum(const struct flowside *toside, + const struct iov_tail *data) { const struct in_addr *src4 = inany_v4(&toside->oaddr); const struct in_addr *dst4 = inany_v4(&toside->eaddr); - char *base = iov_vu[0].iov_base; - struct udp_payload_t *bp; - struct iov_tail data; + struct iov_tail payload = *data; + struct udphdr *uh, uh_storage; + bool ipv4 = src4 && dst4; + + IOV_DROP_HEADER(&payload, struct virtio_net_hdr_mrg_rxbuf); + IOV_DROP_HEADER(&payload, struct ethhdr); + if (ipv4) + IOV_DROP_HEADER(&payload, struct iphdr); + else + IOV_DROP_HEADER(&payload, struct ipv6hdr); + uh = IOV_REMOVE_HEADER(&payload, uh_storage); - if (src4 && dst4) { - bp = vu_payloadv4(base); - data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base); - csum_udp4(&bp->uh, *src4, *dst4, &data); - } else { - bp = vu_payloadv6(base); - data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base); - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data); - } + if (ipv4) + csum_udp4(uh, *src4, *dst4, &payload); + else + csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, &payload); } /** @@ -209,23 +221,28 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)); struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; + struct iov_tail data; int i; for (i = 0; i < n; i++) { + unsigned elem_used; + size_t iov_cnt; ssize_t dlen; - int iov_used; - iov_used = udp_vu_sock_recv(c, vq, s, v6, &dlen); - if (iov_used < 0) + iov_cnt = ARRAY_SIZE(iov_vu); + dlen = udp_vu_sock_recv(c, iov_vu, &iov_cnt, &elem_used, vq, + s, v6); + if (dlen < 0) break; - if (iov_used > 0) { - udp_vu_prepare(c, toside, dlen); + if (iov_cnt > 0) { + data = IOV_TAIL(iov_vu, iov_cnt, 0); + udp_vu_prepare(c, &data, toside, dlen); if (*c->pcap) { - udp_vu_csum(toside, iov_used); - pcap_iov(iov_vu, iov_used, VNET_HLEN); + udp_vu_csum(toside, &data); + pcap_iov(data.iov, data.cnt, VNET_HLEN); } - vu_flush(vdev, vq, elem, iov_used); + vu_flush(vdev, vq, elem, elem_used); } } } -- 2.53.0

Laurent Vivier

7:07 p.m.

New subject: [PATCH v3 7/8] iov: Add IOV_PUT_HEADER() and with_header() to write header data back to iov_tail

Add iov_put_header_() and its wrapper macro IOV_PUT_HEADER() as a counterpart to IOV_PEEK_HEADER(). This writes header data back to an iov_tail after modification. If the header pointer matches the original iov buffer location, the data was already modified in place and no copy is needed. Otherwise, it copies the data back using iov_from_buf(). Add with_header(), a for-loop macro that combines IOV_PEEK_HEADER() and IOV_PUT_HEADER() to allow modifying a header in place within a block scope. Signed-off-by: Laurent Vivier --- iov.c | 22 ++++++++++++++++++++++ iov.h | 25 ++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/iov.c b/iov.c index 34d42c7a2264..41e418aab2cc 100644 --- a/iov.c +++ b/iov.c @@ -307,6 +307,28 @@ void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align) return v; } +/** + * iov_put_header_() - Write header back to an IOV tail + * @tail: IOV tail to write header to + * @v: Pointer to header data to write + * @len: Length of header to write, in bytes + * + * Return: number of bytes written + */ +/* cppcheck-suppress unusedFunction */ +size_t iov_put_header_(const struct iov_tail *tail, const void *v, size_t len) +{ + size_t l = len; + + /* iov_peek_header_() already called iov_check_header() */ + if ((char *)tail->iov[0].iov_base + tail->off != v) + l = iov_from_buf(tail->iov, tail->cnt, tail->off, v, len); + + ASSERT(l == len); + + return l; +} + /** * iov_remove_header_() - Remove a header from an IOV tail * @tail: IOV tail to remove header from (modified) diff --git a/iov.h b/iov.h index d295d05b3bab..4ce425ccdbe5 100644 --- a/iov.h +++ b/iov.h @@ -90,6 +90,7 @@ bool iov_tail_prune(struct iov_tail *tail); size_t iov_tail_size(struct iov_tail *tail); bool iov_drop_header(struct iov_tail *tail, size_t len); void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align); +size_t iov_put_header_(const struct iov_tail *tail, const void *v, size_t len); void *iov_remove_header_(struct iov_tail *tail, void *v, size_t len, size_t align); ssize_t iov_tail_clone(struct iovec *dst_iov, size_t dst_iov_cnt, struct iov_tail *tail); @@ -112,6 +113,16 @@ ssize_t iov_tail_clone(struct iovec *dst_iov, size_t dst_iov_cnt, sizeof(var_), \ __alignof__(var_)))) +/** + * IOV_PUT_HEADER() - Write header back to an IOV tail + * @tail_: IOV tail to write header to + * @var_: Pointer to a variable containing the header data to write + * + * Return: number of bytes written + */ +#define IOV_PUT_HEADER(tail_, var_) \ + (iov_put_header_((tail_), (var_), sizeof(*var_))) + /** * IOV_REMOVE_HEADER() - Remove and return typed header from an IOV tail * @tail_: IOV tail to remove header from (modified) @@ -130,7 +141,8 @@ ssize_t iov_tail_clone(struct iovec *dst_iov, size_t dst_iov_cnt, ((__typeof__(var_) *)(iov_remove_header_((tail_), &(var_), \ sizeof(var_), __alignof__(var_)))) -/** IOV_DROP_HEADER() - Remove a typed header from an IOV tail +/** + * IOV_DROP_HEADER() - Remove a typed header from an IOV tail * @tail_: IOV tail to remove header from (modified) * @type_: Data type of the header to remove * @@ -138,4 +150,15 @@ ssize_t iov_tail_clone(struct iovec *dst_iov, size_t dst_iov_cnt, */ #define IOV_DROP_HEADER(tail_, type_) iov_drop_header((tail_), sizeof(type_)) +/** + * with_header() - Execute a block on a given header + * @type: Data type of the header to modify + * @hdr_: Variable name to receive the header pointer + * @tail_: IOV tail to peek/put the header from/to + */ +#define with_header(type_, hdr_, tail_) \ + for (type_ store_, *hdr_ = IOV_PEEK_HEADER(tail_, store_); \ + hdr_; \ + IOV_PUT_HEADER(tail_, hdr_), hdr_ = NULL) + #endif /* IOVEC_H */ -- 2.53.0

Laurent Vivier

7:07 p.m.

New subject: [PATCH v3 6/8] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller

udp_vu_sock_recv() currently mixes two concerns: receiving data from the socket and managing virtqueue buffers (collecting, rewinding, releasing). This makes the function harder to reason about and couples socket I/O with virtqueue state. Move all virtqueue operations, vu_collect(), vu_init_elem(), vu_queue_rewind(), vu_set_vnethdr(), and the queue-readiness check, into udp_vu_sock_to_tap(), which is the only caller. This turns udp_vu_sock_recv() into a pure socket receive function that simply reads into the provided iov array and adjusts its length. Signed-off-by: Laurent Vivier --- udp_vu.c | 110 +++++++++++++++++++++++++------------------------------ 1 file changed, 49 insertions(+), 61 deletions(-) diff --git a/udp_vu.c b/udp_vu.c index 8b0de312949c..7f6561f83505 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -33,9 +33,6 @@ #include "udp_vu.h" #include "vu_common.h" -static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE]; -static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE]; - /** * udp_vu_hdrlen() - Sum size of all headers, from UDP to virtio-net * @v6: Set for IPv6 packet @@ -58,78 +55,35 @@ static size_t udp_vu_hdrlen(bool v6) /** * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers - * @c: Execution context * @iov: IO vector for the frame (in/out) * @cnt: Number of IO vector entries (in/out) - * @vq: virtqueue to use to receive data * @s: Socket to receive from * @v6: Set for IPv6 connections * - * Return: size of received data, 0 if the datagram - * was discarded because the virtqueue is not ready, -1 on error + * Return: size of received data, -1 on error */ -static ssize_t udp_vu_sock_recv(const struct ctx *c, struct iovec *iov, - size_t *cnt, unsigned *elem_used, - struct vu_virtq *vq, int s, bool v6) +static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6) { - const struct vu_dev *vdev = c->vdev; - struct msghdr msg = { 0 }; + struct iovec msg_iov[*cnt]; + struct msghdr msg = { 0 }; struct iov_tail payload; - size_t hdrlen, iov_used; - unsigned elem_cnt; - unsigned i, j; + size_t hdrlen; ssize_t dlen; - ASSERT(!c->no_udp); - - if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) { - debug("Got UDP packet, but RX virtqueue not usable yet"); - - if (recvmsg(s, &msg, MSG_DONTWAIT) < 0) - debug_perror("Failed to discard datagram"); - - *cnt = 0; - return 0; - } - /* compute L2 header length */ hdrlen = udp_vu_hdrlen(v6); - elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem), - iov, *cnt, &iov_used, - IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL); - if (elem_cnt == 0) - return -1; - - ASSERT((size_t)elem_cnt == iov_used); /* one iovec per element */ - - payload = IOV_TAIL(iov, iov_used, hdrlen); + payload = IOV_TAIL(iov, *cnt, hdrlen); - struct iovec msg_iov[payload.cnt]; msg.msg_iov = msg_iov; msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload); /* read data from the socket */ dlen = recvmsg(s, &msg, 0); - if (dlen < 0) { - vu_queue_rewind(vq, elem_cnt); + if (dlen < 0) return -1; - } - - *cnt = vu_pad(iov, iov_used, 0, dlen + hdrlen); - - *elem_used = 0; - for (i = 0, j = 0; j < *cnt && i < elem_cnt; i++) { - if (j + elem[i].in_num > *cnt) - elem[i].in_num = *cnt - j; - j += elem[i].in_num; - (*elem_used)++; - } - vu_set_vnethdr(iov[0].iov_base, *elem_used); - - /* release unused buffers */ - vu_queue_rewind(vq, elem_cnt - *elem_used); + *cnt = vu_pad(iov, *cnt, 0, dlen + hdrlen); return dlen; } @@ -217,26 +171,60 @@ static void udp_vu_csum(const struct flowside *toside, */ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) { + static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE]; + static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE]; const struct flowside *toside = flowside_at_sidx(tosidx); bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)); struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; - struct iov_tail data; int i; + ASSERT(!c->no_udp); + + if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) { + struct msghdr msg = { 0 }; + + debug("Got UDP packet, but RX virtqueue not usable yet"); + + for (i = 0; i < n; i++) { + if (recvmsg(s, &msg, MSG_DONTWAIT) < 0) + debug_perror("Failed to discard datagram"); + } + + return; + } + for (i = 0; i < n; i++) { - unsigned elem_used; + unsigned elem_used, elem_cnt, j, k; size_t iov_cnt; ssize_t dlen; - iov_cnt = ARRAY_SIZE(iov_vu); - dlen = udp_vu_sock_recv(c, iov_vu, &iov_cnt, &elem_used, vq, - s, v6); - if (dlen < 0) + elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem), + iov_vu, ARRAY_SIZE(iov_vu), &iov_cnt, + IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL); + if (elem_cnt == 0) + break; + + dlen = udp_vu_sock_recv(iov_vu, &iov_cnt, s, v6); + if (dlen < 0) { + vu_queue_rewind(vq, elem_cnt); break; + } + + elem_used = 0; + for (j = 0, k = 0; k < iov_cnt && j < elem_cnt; j++) { + if (k + elem[j].in_num > iov_cnt) + elem[j].in_num = iov_cnt - k; + k += elem[j].in_num; + elem_used++; + } + + /* release unused buffers */ + vu_queue_rewind(vq, elem_cnt - elem_used); if (iov_cnt > 0) { - data = IOV_TAIL(iov_vu, iov_cnt, 0); + struct iov_tail data = IOV_TAIL(iov_vu, iov_cnt, 0); + vu_set_vnethdr(iov_vu[0].iov_base, elem_used); udp_vu_prepare(c, &data, toside, dlen); if (*c->pcap) { udp_vu_csum(toside, &data); -- 2.53.0

Laurent Vivier

7:07 p.m.

New subject: [PATCH v3 8/8] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6()

Change udp_update_hdr4() and udp_update_hdr6() to take an iov_tail covering the full L3 frame (IP header + UDP header + data), instead of separate IP header, udp_payload_t, and data-length parameters. The functions now use with_header() and IOV_DROP_HEADER() to access the IP and UDP headers directly from the iov_tail, and derive sizes via iov_tail_size() rather than an explicit length argument. This decouples the header update functions from the udp_payload_t memory layout, which assumes all headers and data sit in a single contiguous buffer. The vhost-user path uses virtqueue-provided scatter-gather buffers where this assumption does not hold; passing an iov_tail lets both the tap path and the vhost-user path share the same functions without layout-specific helpers. On the vhost-user side, udp_vu_prepare() likewise switches to with_header() for the Ethernet header, and its caller now drops the vnet header before calling udp_vu_prepare() instead of having the function deal with it internally. Signed-off-by: Laurent Vivier --- iov.c | 1 - udp.c | 129 +++++++++++++++++++++++++++---------------------- udp_internal.h | 6 +-- udp_vu.c | 51 +++++++++---------- 4 files changed, 97 insertions(+), 90 deletions(-) diff --git a/iov.c b/iov.c index 41e418aab2cc..d92f28995004 100644 --- a/iov.c +++ b/iov.c @@ -315,7 +315,6 @@ void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align) * * Return: number of bytes written */ -/* cppcheck-suppress unusedFunction */ size_t iov_put_header_(const struct iov_tail *tail, const void *v, size_t len) { size_t l = len; diff --git a/udp.c b/udp.c index 2275c16b19f3..ec764a4a3969 100644 --- a/udp.c +++ b/udp.c @@ -254,42 +254,42 @@ static void udp_iov_init(const struct ctx *c) /** * udp_update_hdr4() - Update headers for one IPv4 datagram - * @ip4h: Pre-filled IPv4 header (except for tot_len and saddr) - * @bp: Pointer to udp_payload_t to update + * @payload: UDP payload * @toside: Flowside for destination side - * @dlen: Length of UDP payload * @no_udp_csum: Do not set UDP checksum * * Return: size of IPv4 payload (UDP header + data) */ -size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, - const struct flowside *toside, size_t dlen, +size_t udp_update_hdr4(struct iov_tail *payload, const struct flowside *toside, bool no_udp_csum) { const struct in_addr *src = inany_v4(&toside->oaddr); const struct in_addr *dst = inany_v4(&toside->eaddr); - size_t l4len = dlen + sizeof(bp->uh); - size_t l3len = l4len + sizeof(*ip4h); + size_t l3len = iov_tail_size(payload); + size_t l4len = l3len - sizeof(struct iphdr); ASSERT(src && dst); - ip4h->tot_len = htons(l3len); - ip4h->daddr = dst->s_addr; - ip4h->saddr = src->s_addr; - ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst); + with_header(struct iphdr, ip4h, payload) { + ip4h->tot_len = htons(l3len); + ip4h->daddr = dst->s_addr; + ip4h->saddr = src->s_addr; + ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst); + } + IOV_DROP_HEADER(payload, struct iphdr); + + with_header(struct udphdr, uh, payload) { + uh->source = htons(toside->oport); + uh->dest = htons(toside->eport); + uh->len = htons(l4len); + if (no_udp_csum) { + uh->check = 0; + } else { + struct iov_tail data = *payload; - bp->uh.source = htons(toside->oport); - bp->uh.dest = htons(toside->eport); - bp->uh.len = htons(l4len); - if (no_udp_csum) { - bp->uh.check = 0; - } else { - const struct iovec iov = { - .iov_base = bp->data, - .iov_len = dlen - }; - struct iov_tail data = IOV_TAIL(&iov, 1, 0); - csum_udp4(&bp->uh, *src, *dst, &data); + IOV_DROP_HEADER(&data, struct udphdr); + csum_udp4(uh, *src, *dst, &data); + } } return l4len; @@ -297,44 +297,45 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, /** * udp_update_hdr6() - Update headers for one IPv6 datagram - * @ip6h: Pre-filled IPv6 header (except for payload_len and - * addresses) - * @bp: Pointer to udp_payload_t to update + * @payload: UDP payload * @toside: Flowside for destination side - * @dlen: Length of UDP payload * @no_udp_csum: Do not set UDP checksum * * Return: size of IPv6 payload (UDP header + data) */ -size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, - const struct flowside *toside, size_t dlen, +size_t udp_update_hdr6(struct iov_tail *payload, const struct flowside *toside, bool no_udp_csum) { - uint16_t l4len = dlen + sizeof(bp->uh); - - ip6h->payload_len = htons(l4len); - ip6h->daddr = toside->eaddr.a6; - ip6h->saddr = toside->oaddr.a6; - ip6h->version = 6; - ip6h->nexthdr = IPPROTO_UDP; - ip6h->hop_limit = 255; - - bp->uh.source = htons(toside->oport); - bp->uh.dest = htons(toside->eport); - bp->uh.len = ip6h->payload_len; - if (no_udp_csum) { - /* 0 is an invalid checksum for UDP IPv6 and dropped by - * the kernel stack, even if the checksum is disabled by virtio - * flags. We need to put any non-zero value here. - */ - bp->uh.check = 0xffff; - } else { - const struct iovec iov = { - .iov_base = bp->data, - .iov_len = dlen - }; - struct iov_tail data = IOV_TAIL(&iov, 1, 0); - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data); + uint16_t l4len = iov_tail_size(payload) - sizeof(struct ipv6hdr); + + with_header(struct ipv6hdr, ip6h, payload) { + ip6h->payload_len = htons(l4len); + ip6h->daddr = toside->eaddr.a6; + ip6h->saddr = toside->oaddr.a6; + ip6h->version = 6; + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 255; + } + IOV_DROP_HEADER(payload, struct ipv6hdr); + + with_header(struct udphdr, uh, payload) { + uh->source = htons(toside->oport); + uh->dest = htons(toside->eport); + uh->len = htons(l4len); + if (no_udp_csum) { + /* 0 is an invalid checksum for UDP IPv6 and dropped by + * the kernel stack, even if the checksum is disabled + * by virtio flags. We need to put any non-zero value + * here. + */ + uh->check = 0xffff; + } else { + struct iov_tail data = *payload; + + IOV_DROP_HEADER(&data, struct udphdr); + csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, + &data); + } } return l4len; @@ -374,12 +375,22 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base; struct udp_payload_t *bp = &udp_payload[idx]; struct udp_meta_t *bm = &udp_meta[idx]; + struct iovec iov[3]; + struct iov_tail payload = IOV_TAIL(iov, ARRAY_SIZE(iov), 0); size_t l4len, l2len; + iov[1].iov_base = &bp->uh; + iov[1].iov_len = sizeof(bp->uh); + iov[2].iov_base = bp->data; + iov[2].iov_len = mmh[idx].msg_len; + eth_update_mac(eh, NULL, tap_omac); if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) { - l4len = udp_update_hdr6(&bm->ip6h, bp, toside, - mmh[idx].msg_len, no_udp_csum); + + iov[0].iov_base = &bm->ip6h; + iov[0].iov_len = sizeof(bm->ip6h); + + l4len = udp_update_hdr6(&payload, toside, no_udp_csum); l2len = MAX(l4len + sizeof(bm->ip6h) + ETH_HLEN, ETH_ZLEN); tap_hdr_update(&bm->taph, l2len); @@ -387,8 +398,10 @@ static void udp_tap_prepare(const struct mmsghdr *mmh, eh->h_proto = htons_constant(ETH_P_IPV6); (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h); } else { - l4len = udp_update_hdr4(&bm->ip4h, bp, toside, - mmh[idx].msg_len, no_udp_csum); + iov[0].iov_base = &bm->ip4h; + iov[0].iov_len = sizeof(bm->ip4h); + + l4len = udp_update_hdr4(&payload, toside, no_udp_csum); l2len = MAX(l4len + sizeof(bm->ip4h) + ETH_HLEN, ETH_ZLEN); tap_hdr_update(&bm->taph, l2len); diff --git a/udp_internal.h b/udp_internal.h index 64e457748324..fb3017ae3251 100644 --- a/udp_internal.h +++ b/udp_internal.h @@ -25,11 +25,9 @@ struct udp_payload_t { } __attribute__ ((packed, aligned(__alignof__(unsigned int)))); #endif -size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp, - const struct flowside *toside, size_t dlen, +size_t udp_update_hdr4(struct iov_tail *payload, const struct flowside *toside, bool no_udp_csum); -size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp, - const struct flowside *toside, size_t dlen, +size_t udp_update_hdr6(struct iov_tail *payload, const struct flowside *toside, bool no_udp_csum); void udp_sock_fwd(const struct ctx *c, int s, int rule_hint, uint8_t frompif, in_port_t port, const struct timespec *now); diff --git a/udp_vu.c b/udp_vu.c index 7f6561f83505..f4023c765b6b 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -93,42 +93,39 @@ static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6) * @c: Execution context * @data: IO vector tail for the frame * @toside: Address information for one side of the flow - * @dlen: Packet data length * * Return: Layer-4 length */ static size_t udp_vu_prepare(const struct ctx *c, const struct iov_tail *data, - const struct flowside *toside, ssize_t dlen) + const struct flowside *toside) { - const struct iovec *iov = data->iov; - struct ethhdr *eh; + bool ipv4 = inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr); + struct iov_tail payload = *data; size_t l4len; /* ethernet header */ - eh = vu_eth(iov[0].iov_base); - - memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest)); - memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source)); + with_header(struct ethhdr, eh, &payload) { + memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest)); + memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source)); + + if (ipv4) + eh->h_proto = htons(ETH_P_IP); + else + eh->h_proto = htons(ETH_P_IPV6); + } + IOV_DROP_HEADER(&payload, struct ethhdr); /* initialize header */ - if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) { - struct iphdr *iph = vu_ip(iov[0].iov_base); - struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base); + if (ipv4) { + with_header(struct iphdr, iph, &payload) + *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP); - eh->h_proto = htons(ETH_P_IP); - - *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP); - - l4len = udp_update_hdr4(iph, bp, toside, dlen, true); + l4len = udp_update_hdr4(&payload, toside, true); } else { - struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base); - struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base); - - eh->h_proto = htons(ETH_P_IPV6); - - *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP); + with_header(struct ipv6hdr, ip6h, &payload) + *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP); - l4len = udp_update_hdr6(ip6h, bp, toside, dlen, true); + l4len = udp_update_hdr6(&payload, toside, true); } return l4len; @@ -137,7 +134,7 @@ static size_t udp_vu_prepare(const struct ctx *c, const struct iov_tail *data, /** * udp_vu_csum() - Calculate and set checksum for a UDP packet * @toside: Address information for one side of the flow - * @data: IO vector tail for the frame (including vnet header) + * @data: IO vector tail for the L2 frame */ static void udp_vu_csum(const struct flowside *toside, const struct iov_tail *data) @@ -148,7 +145,6 @@ static void udp_vu_csum(const struct flowside *toside, struct udphdr *uh, uh_storage; bool ipv4 = src4 && dst4; - IOV_DROP_HEADER(&payload, struct virtio_net_hdr_mrg_rxbuf); IOV_DROP_HEADER(&payload, struct ethhdr); if (ipv4) IOV_DROP_HEADER(&payload, struct iphdr); @@ -225,10 +221,11 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) if (iov_cnt > 0) { struct iov_tail data = IOV_TAIL(iov_vu, iov_cnt, 0); vu_set_vnethdr(iov_vu[0].iov_base, elem_used); - udp_vu_prepare(c, &data, toside, dlen); + iov_drop_header(&data, VNET_HLEN); + udp_vu_prepare(c, &data, toside); if (*c->pcap) { udp_vu_csum(toside, &data); - pcap_iov(data.iov, data.cnt, VNET_HLEN); + pcap_iov(data.iov, data.cnt, data.off); } vu_flush(vdev, vq, elem, elem_used); } -- 2.53.0

Age (days ago)

Last active (days ago)

List overview

Download

8 comments

1 participants

participants (1)

Laurent Vivier

[PATCH v3 0/8] vhost-user,udp: Handle multiple iovec entries per virtqueue element

tags

participants (1)