During testing it is sometimes useful to force traffic which would
normally be forwared by socket splicing through the tap interface.
In this commit, we add a command switch enabling such funtionality
for inbound local traffic.
For outbound local traffic this is much trickier, if even possible,
so leave that for a later commit.
Suggested-by: David Gibson <david(a)gibson.dropbear.id.au>
Signed-off-by: Jon Maloy <jmaloy(a)redhat.com>
---
v2: Some minor changes based on feedback …
[View More]from PASST team
v3: More changes based on feedback from D. Gibson and S. Brivio
-Moved new option to pasta-only section
-Added description to man-page
v4: -Changed test on (mode == PASST) to (mode != PASTA) as
suggested by Stefano Brivio.
v5: -Updated text in man pages as suggested by Stefano Brivio.
---
conf.c | 7 ++++++-
fwd.c | 2 +-
passt.1 | 5 +++++
passt.h | 2 ++
4 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/conf.c b/conf.c
index eaa7d99..97d8beb 100644
--- a/conf.c
+++ b/conf.c
@@ -977,7 +977,8 @@ pasta_opts:
" Don't copy all routes to namespace\n"
" --no-copy-addrs DEPRECATED:\n"
" Don't copy all addresses to namespace\n"
- " --ns-mac-addr ADDR Set MAC address on tap interface\n");
+ " --ns-mac-addr ADDR Set MAC address on tap interface\n"
+ " --no-splice Disable inbound socket splicing\n");
exit(status);
}
@@ -1319,6 +1320,7 @@ void conf(struct ctx *c, int argc, char **argv)
{"no-dhcpv6", no_argument, &c->no_dhcpv6, 1 },
{"no-ndp", no_argument, &c->no_ndp, 1 },
{"no-ra", no_argument, &c->no_ra, 1 },
+ {"no-splice", no_argument, &c->no_splice, 1 },
{"freebind", no_argument, &c->freebind, 1 },
{"no-map-gw", no_argument, &no_map_gw, 1 },
{"ipv4-only", no_argument, NULL, '4' },
@@ -1756,6 +1758,9 @@ void conf(struct ctx *c, int argc, char **argv)
}
} while (name != -1);
+ if (c->mode != MODE_PASTA)
+ c->no_splice = 1;
+
if (c->mode == MODE_PASTA && !c->pasta_conf_ns) {
if (copy_routes_opt)
die("--no-copy-routes needs --config-net");
diff --git a/fwd.c b/fwd.c
index 0b7f8b1..2829cd2 100644
--- a/fwd.c
+++ b/fwd.c
@@ -443,7 +443,7 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
else if (proto == IPPROTO_UDP)
tgt->eport += c->udp.fwd_in.delta[tgt->eport];
- if (c->mode == MODE_PASTA && inany_is_loopback(&ini->eaddr) &&
+ if (!c->no_splice && inany_is_loopback(&ini->eaddr) &&
(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
/* spliceable */
diff --git a/passt.1 b/passt.1
index b2896a2..d9cd33e 100644
--- a/passt.1
+++ b/passt.1
@@ -695,6 +695,11 @@ Configure MAC address \fIaddr\fR on the tap interface in the namespace.
Default is to let the tap driver build a pseudorandom hardware address.
+.TP
+.BR \-\-no-splice
+Disable the bypass path for inbound, local traffic. See the section \fBHandling
+of local traffic in pasta\fR in the \fBNOTES\fR for more details.
+
.SH EXAMPLES
.SS \fBpasta
diff --git a/passt.h b/passt.h
index c038630..0dd4efa 100644
--- a/passt.h
+++ b/passt.h
@@ -229,6 +229,7 @@ struct ip6_ctx {
* @no_dhcpv6: Disable DHCPv6 server
* @no_ndp: Disable NDP handler altogether
* @no_ra: Disable router advertisements
+ * @no_splice: Disable socket splicing for inbound traffic
* @host_lo_to_ns_lo: Map host loopback addresses to ns loopback addresses
* @freebind: Allow binding of non-local addresses for forwarding
* @low_wmem: Low probed net.core.wmem_max
@@ -291,6 +292,7 @@ struct ctx {
int no_dhcpv6;
int no_ndp;
int no_ra;
+ int no_splice;
int host_lo_to_ns_lo;
int freebind;
--
2.47.1
[View Less]
During testing it is sometimes useful to force traffic which would
normally be forwared by socket splicing through the tap interface.
In this commit, we add a command switch enabling such funtionality
for inbound local traffic.
For outbound local traffic this is much trickier, if even possible,
so leave that for a later commit.
Suggested-by: David Gibson <david(a)gibson.dropbear.id.au>
Signed-off-by: Jon Maloy <jmaloy(a)redhat.com>
---
v2: Some minor changes based on feedback …
[View More]from PASST team
v3: More changes based on feedback from D. Gibson and S. Brivio
-Moved new option to pasta-only section
-Added description to man-page
v4: -Changed test on (mode == PASST) to (mode != PASTA) as
suggested by Stefano Brivio.
---
conf.c | 7 ++++++-
fwd.c | 2 +-
passt.1 | 4 ++++
passt.h | 2 ++
4 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/conf.c b/conf.c
index eaa7d99..97d8beb 100644
--- a/conf.c
+++ b/conf.c
@@ -977,7 +977,8 @@ pasta_opts:
" Don't copy all routes to namespace\n"
" --no-copy-addrs DEPRECATED:\n"
" Don't copy all addresses to namespace\n"
- " --ns-mac-addr ADDR Set MAC address on tap interface\n");
+ " --ns-mac-addr ADDR Set MAC address on tap interface\n"
+ " --no-splice Disable inbound socket splicing\n");
exit(status);
}
@@ -1319,6 +1320,7 @@ void conf(struct ctx *c, int argc, char **argv)
{"no-dhcpv6", no_argument, &c->no_dhcpv6, 1 },
{"no-ndp", no_argument, &c->no_ndp, 1 },
{"no-ra", no_argument, &c->no_ra, 1 },
+ {"no-splice", no_argument, &c->no_splice, 1 },
{"freebind", no_argument, &c->freebind, 1 },
{"no-map-gw", no_argument, &no_map_gw, 1 },
{"ipv4-only", no_argument, NULL, '4' },
@@ -1756,6 +1758,9 @@ void conf(struct ctx *c, int argc, char **argv)
}
} while (name != -1);
+ if (c->mode != MODE_PASTA)
+ c->no_splice = 1;
+
if (c->mode == MODE_PASTA && !c->pasta_conf_ns) {
if (copy_routes_opt)
die("--no-copy-routes needs --config-net");
diff --git a/fwd.c b/fwd.c
index 0b7f8b1..2829cd2 100644
--- a/fwd.c
+++ b/fwd.c
@@ -443,7 +443,7 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
else if (proto == IPPROTO_UDP)
tgt->eport += c->udp.fwd_in.delta[tgt->eport];
- if (c->mode == MODE_PASTA && inany_is_loopback(&ini->eaddr) &&
+ if (!c->no_splice && inany_is_loopback(&ini->eaddr) &&
(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
/* spliceable */
diff --git a/passt.1 b/passt.1
index b2896a2..c8a5783 100644
--- a/passt.1
+++ b/passt.1
@@ -695,6 +695,10 @@ Configure MAC address \fIaddr\fR on the tap interface in the namespace.
Default is to let the tap driver build a pseudorandom hardware address.
+.TP
+.BR \-\-no-splice
+Disable socket splicing for host to NS traffic.
+
.SH EXAMPLES
.SS \fBpasta
diff --git a/passt.h b/passt.h
index c038630..0dd4efa 100644
--- a/passt.h
+++ b/passt.h
@@ -229,6 +229,7 @@ struct ip6_ctx {
* @no_dhcpv6: Disable DHCPv6 server
* @no_ndp: Disable NDP handler altogether
* @no_ra: Disable router advertisements
+ * @no_splice: Disable socket splicing for inbound traffic
* @host_lo_to_ns_lo: Map host loopback addresses to ns loopback addresses
* @freebind: Allow binding of non-local addresses for forwarding
* @low_wmem: Low probed net.core.wmem_max
@@ -291,6 +292,7 @@ struct ctx {
int no_dhcpv6;
int no_ndp;
int no_ra;
+ int no_splice;
int host_lo_to_ns_lo;
int freebind;
--
2.45.2
[View Less]
During testing it is sometimes useful to force traffic which would
normally be forwared by socket splicing through the tap interface.
In this commit, we add a command switch enabling such funtionality
for inbound local traffic.
For outbound local traffic this is much trickier, if even possible,
so leave that for a later commit.
Suggested-by: David Gibson <david(a)gibson.dropbear.id.au>
Signed-off-by: Jon Maloy <jmaloy(a)redhat.com>
---
v2: Some minor changes based on feedback …
[View More]from PASST team
v3: More changes based on feedback from D. Gibson and S. Brivio
-Moved new option to pasta-only section
-Added description to man-page
---
conf.c | 7 ++++++-
fwd.c | 2 +-
passt.1 | 4 ++++
passt.h | 2 ++
4 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/conf.c b/conf.c
index eaa7d99..53f6770 100644
--- a/conf.c
+++ b/conf.c
@@ -977,7 +977,8 @@ pasta_opts:
" Don't copy all routes to namespace\n"
" --no-copy-addrs DEPRECATED:\n"
" Don't copy all addresses to namespace\n"
- " --ns-mac-addr ADDR Set MAC address on tap interface\n");
+ " --ns-mac-addr ADDR Set MAC address on tap interface\n"
+ " --no-splice Disable inbound socket splicing\n");
exit(status);
}
@@ -1319,6 +1320,7 @@ void conf(struct ctx *c, int argc, char **argv)
{"no-dhcpv6", no_argument, &c->no_dhcpv6, 1 },
{"no-ndp", no_argument, &c->no_ndp, 1 },
{"no-ra", no_argument, &c->no_ra, 1 },
+ {"no-splice", no_argument, &c->no_splice, 1 },
{"freebind", no_argument, &c->freebind, 1 },
{"no-map-gw", no_argument, &no_map_gw, 1 },
{"ipv4-only", no_argument, NULL, '4' },
@@ -1756,6 +1758,9 @@ void conf(struct ctx *c, int argc, char **argv)
}
} while (name != -1);
+ if (c->mode == MODE_PASST)
+ c->no_splice = 1;
+
if (c->mode == MODE_PASTA && !c->pasta_conf_ns) {
if (copy_routes_opt)
die("--no-copy-routes needs --config-net");
diff --git a/fwd.c b/fwd.c
index 0b7f8b1..2829cd2 100644
--- a/fwd.c
+++ b/fwd.c
@@ -443,7 +443,7 @@ uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
else if (proto == IPPROTO_UDP)
tgt->eport += c->udp.fwd_in.delta[tgt->eport];
- if (c->mode == MODE_PASTA && inany_is_loopback(&ini->eaddr) &&
+ if (!c->no_splice && inany_is_loopback(&ini->eaddr) &&
(proto == IPPROTO_TCP || proto == IPPROTO_UDP)) {
/* spliceable */
diff --git a/passt.1 b/passt.1
index b2896a2..c8a5783 100644
--- a/passt.1
+++ b/passt.1
@@ -695,6 +695,10 @@ Configure MAC address \fIaddr\fR on the tap interface in the namespace.
Default is to let the tap driver build a pseudorandom hardware address.
+.TP
+.BR \-\-no-splice
+Disable socket splicing for host to NS traffic.
+
.SH EXAMPLES
.SS \fBpasta
diff --git a/passt.h b/passt.h
index c038630..0dd4efa 100644
--- a/passt.h
+++ b/passt.h
@@ -229,6 +229,7 @@ struct ip6_ctx {
* @no_dhcpv6: Disable DHCPv6 server
* @no_ndp: Disable NDP handler altogether
* @no_ra: Disable router advertisements
+ * @no_splice: Disable socket splicing for inbound traffic
* @host_lo_to_ns_lo: Map host loopback addresses to ns loopback addresses
* @freebind: Allow binding of non-local addresses for forwarding
* @low_wmem: Low probed net.core.wmem_max
@@ -291,6 +292,7 @@ struct ctx {
int no_dhcpv6;
int no_ndp;
int no_ra;
+ int no_splice;
int host_lo_to_ns_lo;
int freebind;
--
2.45.2
[View Less]
Well, that's the impetus, but it's done as a more general changing of
where we sanity check flow endpoints.
Link: https://bugs.passt.top/show_bug.cgi?id=105
David Gibson (2):
udp: Improve detail of UDP endpoint sanity checking
flow: Remove over-zealous sanity checks in flow_sidx_hash()
flow.c | 7 +------
udp_flow.c | 32 ++++++++++++++++++++++++--------
2 files changed, 25 insertions(+), 14 deletions(-)
--
2.47.0
This series of patches adds vhost-user support to passt
and then allows passt to connect to QEMU network backend using
virtqueue rather than a socket.
With QEMU, rather than using to connect:
-netdev stream,id=s,server=off,addr.type=unix,addr.path=/tmp/passt_1.socket
we will use:
-chardev socket,id=chr0,path=/tmp/passt_1.socket
-netdev vhost-user,id=netdev0,chardev=chr0
-device virtio-net,netdev=netdev0
-object memory-backend-memfd,id=memfd0,share=on,size=$RAMSIZE
-numa node,…
[View More]memdev=memfd0
The memory backend is needed to share data between passt and QEMU.
Performance comparison between "-netdev stream" and "-netdev vhost-user":
$ iperf3 -c localhost -p 10001 -t 60 -6 -u -b 50G
socket:
[ 5] 0.00-60.05 sec 95.6 GBytes 13.7 Gbits/sec 0.017 ms 6998988/10132413 (69%) receiver
vhost-user:
[ 5] 0.00-60.04 sec 237 GBytes 33.9 Gbits/sec 0.006 ms 53673/7813770 (0.69%) receiver
$ iperf3 -c localhost -p 10001 -t 60 -4 -u -b 50G
socket:
[ 5] 0.00-60.05 sec 98.9 GBytes 14.1 Gbits/sec 0.018 ms 6260735/9501832 (66%) receiver
vhost-user:
[ 5] 0.00-60.05 sec 235 GBytes 33.7 Gbits/sec 0.008 ms 37581/7752699 (0.48%) receiver
$ iperf3 -c localhost -p 10001 -t 60 -6
socket:
[ 5] 0.00-60.00 sec 17.3 GBytes 2.48 Gbits/sec 0 sender
[ 5] 0.00-60.06 sec 17.3 GBytes 2.48 Gbits/sec receiver
vhost-user:
[ 5] 0.00-60.00 sec 191 GBytes 27.4 Gbits/sec 0 sender
[ 5] 0.00-60.05 sec 191 GBytes 27.3 Gbits/sec receiver
$ iperf3 -c localhost -p 10001 -t 60 -4
socket:
[ 5] 0.00-60.00 sec 15.6 GBytes 2.24 Gbits/sec 0 sender
[ 5] 0.00-60.06 sec 15.6 GBytes 2.24 Gbits/sec receiver
vhost-user:
[ 5] 0.00-60.00 sec 189 GBytes 27.1 Gbits/sec 0 sender
[ 5] 0.00-60.04 sec 189 GBytes 27.0 Gbits/sec receiver
v14:
- merge "tcp_vu: Share more header construction between IPv4 and IPv6 paths"
into "vhost-user: add vhost-user" to simplify rework
- address comments from David
- in tcp_vu.c, use an array to point to the iovecs that contain headers
v13:
- fix TCP big file transfer test with SO_PEEK_OFF
v12:
- rebase
- address comments from Stefano
v11:
- rebase
- address comments from David on v10
v10:
- rebase v9 on top of my changes
- remove last 4 patches from v9 as
'tcp: Pass TCP header and payload separately to tcp_fill_headers[46]()'
introduces a regression in "make check" for me.
- addressed comments from David
- I tried to cleanup iov management, but I was not able to remove the
update of the first iov to point to the header or to the data
- upd_vu_hdrlen()/tcp_vu_hdrlen() includes now the size of the
virtio-net header
- I didn't address comments from Stefano.
- I didn't fix the bug with seek_offset_cap
v9: [David Gibson]
- Rebased on current main
- Conflicts with v4/v6 buffer merge addressed
- Conflicts with TCP options construction rework addressed
- Added several cleanup patches on top
- Added a number of IOV and buffer cleanups on top
- The aim is that these should allow more sharing of logic between
the vhost-user and non-vhost-user pathes, although they've only
minimally accomplished that so far.
v8:
- remove iov_size() from vu_collect_one_frame()
- move vu_packet_check_range() to vu_common.c
- fix UDP when dlen is 0.
v7:
- rebase
- use vu_collect_one_frame() to do vu_collect() (collect multiple frame)
- add vhost-user tests from Stefano
v6:
- rebase
- extract 3 patches from "vhost-user: add vhost-user":
passt: rename tap_sock_init() to tap_backend_init()
tcp: Export headers functions
udp: Prepare udp.c to be shared with vhost-user
- introduce new functions vu_collect_one_frame(),
vu_collect(), vu_set_vnethdr(), vu_flush(), vu_send_single()
to be called from tcp_vu.c, udp_vu.c and ICMP/DHCP where vhost-user
code was duplicated.
v5:
- rebase on top of 2024_09_06.6b38f07
- rework udp_vu.c as ref.udp.v6 has been removed and we need to
know if we receive IPv4 or IPv6 frame when we prepare the
guest buffers for recvmsg()
- remove vnet->hdrlen as the size is always the same with virtio-net v1
- address comments from David and Stefano
v4:
- rebase on top of 2024_08_21.1d6142f
(rebasing on top of 620e19a1b48a ("udp: Merge udp[46]_mh_recv arrays")
introduces a regression in the measure of the latency with UDP
because I think I don't replace correctly ref.udp.v6 that is removed
by this commit)
- Addressed most of the comments from David and Stefano
(I didn't want to postpone this version to next week,
so I'll address the remaining comments in the next version).
v3:
- rebase on top of flow table
- update tcp_vu.c to look like udp_vu.c (recv()/prepare()/send_frame())
- address comments from Stefano and David on version 2
v2:
- remove PATCH 4
- rewrite PATCH 2 and 3 to follow passt coding style
- move some code from PATCH 3 to PATCH 4 (previously PATCH 5)
- partially addressed David's comment on PATCH 5
David Gibson (1):
tcp: Move tcp_l2_buf_fill_headers() to tcp_buf.c
Laurent Vivier (7):
packet: replace struct desc by struct iovec
vhost-user: introduce virtio API
vhost-user: introduce vhost-user API
udp: Prepare udp.c to be shared with vhost-user
tcp: Export headers functions
passt: rename tap_sock_init() to tap_backend_init()
vhost-user: add vhost-user
Stefano Brivio (1):
test: Add tests for passt in vhost-user mode
Makefile | 9 +-
conf.c | 19 +-
epoll_type.h | 4 +
iov.c | 1 -
isolation.c | 17 +-
packet.c | 91 ++--
packet.h | 22 +-
passt.1 | 10 +-
passt.c | 11 +-
passt.h | 7 +
pcap.c | 1 -
tap.c | 129 ++++--
tap.h | 7 +-
tcp.c | 76 +---
tcp_buf.c | 39 +-
tcp_internal.h | 19 +-
tcp_vu.c | 493 +++++++++++++++++++++
tcp_vu.h | 12 +
test/lib/perf_report | 15 +
test/lib/setup | 77 +++-
test/lib/setup_ugly | 2 +-
test/passt_vu | 1 +
test/passt_vu_in_ns | 1 +
test/perf/passt_vu_tcp | 211 +++++++++
test/perf/passt_vu_udp | 159 +++++++
test/run | 25 ++
test/two_guests_vu | 1 +
udp.c | 85 ++--
udp_internal.h | 34 ++
udp_vu.c | 343 ++++++++++++++
udp_vu.h | 13 +
util.h | 9 +
vhost_user.c | 981 +++++++++++++++++++++++++++++++++++++++++
vhost_user.h | 206 +++++++++
virtio.c | 645 +++++++++++++++++++++++++++
virtio.h | 184 ++++++++
vu_common.c | 282 ++++++++++++
vu_common.h | 60 +++
38 files changed, 4099 insertions(+), 202 deletions(-)
create mode 100644 tcp_vu.c
create mode 100644 tcp_vu.h
create mode 120000 test/passt_vu
create mode 120000 test/passt_vu_in_ns
create mode 100644 test/perf/passt_vu_tcp
create mode 100644 test/perf/passt_vu_udp
create mode 120000 test/two_guests_vu
create mode 100644 udp_internal.h
create mode 100644 udp_vu.c
create mode 100644 udp_vu.h
create mode 100644 vhost_user.c
create mode 100644 vhost_user.h
create mode 100644 virtio.c
create mode 100644 virtio.h
create mode 100644 vu_common.c
create mode 100644 vu_common.h
--
2.47.0
[View Less]