In vhost-user mode, where `pool->buf` doesn't store packet data directly,
this patch repurposes it to hold the `struct iovec` array describing
a scattered packet. This enables pools to manage true scatter-gather
descriptors, with iovecs pointing to guest memory.
`p->pkt[idx].iov_base` is now an index into this `pool->buf` iovec array,
and `p->pkt[idx].iov_len` is the count of iovecs. `packet_add_do` uses
`iov_tail_slice` to store these iovec descriptors from an input iov_tail
into `pool->buf`. `packet_data_do` reconstructs the iov_tail for a
packet by pointing to this stored array of iovecs.
Signed-off-by: Laurent Vivier
---
packet.c | 164 +++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 148 insertions(+), 16 deletions(-)
diff --git a/packet.c b/packet.c
index fd3b6db57c30..8dbe00af12c6 100644
--- a/packet.c
+++ b/packet.c
@@ -84,6 +84,122 @@ bool pool_full(const struct pool *p)
return p->count >= p->size;
}
+/**
+ * packet_iov_max_cnt() - Return the maximum number of iovec entries we can
+ * store
+ * @p: Pointer to packet pool
+ *
+ * Return: the maximum number of iovec entries we can store in the memory of
+ * the pool buffer
+ */
+static size_t packet_iov_max_cnt(const struct pool *p)
+{
+ return p->buf_size / sizeof(struct iovec);
+}
+
+/**
+ * packet_iov_idx() - For a given packet index, return the iovec index and
+ * the number of iovec entry of the packet
+ * @p: Pointer to packet pool
+ * @idx: Index of packet descriptor in pool
+ * @iov_cnt: Pointer to store the number of the iovec entry of the packet
+ * @func: For tracing: name of calling function
+ * @line: For tracing: caller line of function call
+ *
+ * Return: the iovec index for the given packet index, @iov_cnt is set
+ * to the number of the iovec entry of the packet
+ */
+static int packet_iov_idx(const struct pool *p, size_t idx, size_t *iov_cnt,
+ const char *func, int line)
+{
+ size_t iov_idx, max = packet_iov_max_cnt(p);
+
+ iov_idx = (size_t)p->pkt[idx].iov_base;
+ *iov_cnt = p->pkt[idx].iov_len;
+
+ ASSERT_WITH_MSG(iov_idx + *iov_cnt <= max,
+ "Corrupt iov entry: (%zu, %zu), max: %zu, %s:%i",
+ iov_idx, *iov_cnt, max, func, line);
+
+ return iov_idx;
+}
+
+/**
+ * packet_iov_next_idx() - Give the the next available iovec index
+ * @p: Pointer to packet pool
+ * @idx: Index of packet descriptor in pool
+ * @func: For tracing: name of calling function
+ * @line: For tracing: caller line of function call
+ *
+ * Return: the next available iovec index
+ */
+static size_t packet_iov_next_idx(const struct pool *p, size_t idx,
+ const char *func, int line)
+{
+ size_t iov_idx, iov_cnt;
+
+ if (idx == 0)
+ return 0;
+
+ iov_idx = packet_iov_idx(p, idx - 1, &iov_cnt, func, line);
+
+ return iov_idx + iov_cnt;
+}
+
+/**
+ * packet_iov_data() - For a given packet index, provide the iovec array
+ * @p: Pointer to packet pool
+ * @idx: Index of packet descriptor in pool
+ * @data: iov_tail to store iovec array and count
+ * (offset is always set to 0)
+ * @func: For tracing: name of calling function
+ * @line: For tracing: caller line of function call
+ */
+static void packet_iov_data(const struct pool *p, size_t idx,
+ struct iov_tail *data,
+ const char *func, int line)
+{
+ struct iovec *iov = (struct iovec *)p->buf;
+ size_t iov_idx, iov_cnt;
+
+ iov_idx = packet_iov_idx(p, idx, &iov_cnt, func, line);
+
+ data->iov = &iov[iov_idx];
+ data->cnt = iov_cnt;
+ data->off = 0;
+}
+
+/**
+ * packet_iov_check_range() - Check if iovec array is valid for a pool
+ * @p: Pointer to packet pool
+ * @data: iov_tail that stores the iovec array to check
+ * @func: For tracing: name of calling function
+ * @line: For tracing: caller line of function call
+ *
+ * Return: 0 if the range is valid, -1 otherwise
+ */
+static int packet_iov_check_range(const struct pool *p,
+ const struct iov_tail *data,
+ const char *func, int line)
+{
+ size_t offset, i;
+
+ offset = data->off;
+ for (i = 0; i < data->cnt; i++) {
+ int ret;
+
+ ret = packet_check_range(p,
+ (char *)data->iov[i].iov_base + offset,
+ data->iov[i].iov_len - offset,
+ func, line);
+ if (ret)
+ return ret;
+ offset = 0;
+ }
+
+ return 0;
+}
+
/**
* packet_add_do() - Add data as packet descriptor to given pool
* @p: Existing pool
@@ -107,14 +223,32 @@ void packet_add_do(struct pool *p, struct iov_tail *data,
if (!iov_tail_prune(data))
return;
- ASSERT(data->cnt == 1); /* we don't support iovec */
+ if (packet_iov_check_range(p, data, func, line))
+ return;
+
+ if (p->memory) {
+ size_t iov_max_cnt = packet_iov_max_cnt(p);
+ struct iovec *iov = (struct iovec *)p->buf;
+ size_t iov_idx;
+ int iov_cnt;
- len = data->iov[0].iov_len - data->off;
- start = (char *)data->iov[0].iov_base + data->off;
+ iov_idx = packet_iov_next_idx(p, idx, func, line);
- if (packet_check_range(p, start, len, func, line))
- return;
+ iov_cnt = iov_tail_clone(&iov[iov_idx], iov_max_cnt - iov_idx,
+ data);
+ if (iov_cnt < 0) {
+ debug("add iov (%zu,%zu) to buf with size %zu, %s:%i",
+ iov_idx, data->cnt, iov_max_cnt, func, line);
+ return;
+ }
+ len = iov_cnt;
+ /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+ start = (char *)iov_idx;
+ } else {
+ len = data->iov[0].iov_len - data->off;
+ start = (char *)data->iov[0].iov_base + data->off;
+ }
p->pkt[idx].iov_base = (void *)start;
p->pkt[idx].iov_len = len;
@@ -136,8 +270,6 @@ bool packet_get_do(const struct pool *p, size_t idx,
struct iov_tail *data,
const char *func, int line)
{
- size_t i;
-
ASSERT_WITH_MSG(p->count <= p->size,
"Corrupted pool count: %zu, size: %zu, %s:%i",
p->count, p->size, func, line);
@@ -148,17 +280,17 @@ bool packet_get_do(const struct pool *p, size_t idx,
return false;
}
- data->cnt = 1;
- data->off = 0;
- data->iov = &p->pkt[idx];
-
- for (i = 0; i < data->cnt; i++) {
- ASSERT_WITH_MSG(!packet_check_range(p, data->iov[i].iov_base,
- data->iov[i].iov_len,
- func, line),
- "Corrupt packet pool, %s:%i", func, line);
+ if (p->memory) {
+ packet_iov_data(p, idx, data, func, line);
+ } else {
+ data->cnt = 1;
+ data->off = 0;
+ data->iov = &p->pkt[idx];
}
+ ASSERT_WITH_MSG(!packet_iov_check_range(p, data, func, line),
+ "Corrupt packet pool, %s:%i", func, line);
+
return true;
}
--
2.49.0