Currently icmp_id_map[][] stores information about ping sockets in a bespoke structure. Move the same information into a new type of flow that lives in the flow table. To match that change, replace the existing ICMP timer with a flow-based timer for expiring ping sockets. This has the advantage that we only need to scan the active flows, not all possible ids. For now we still index the flow table entries with icmp_id_map[][], and we don't populate all the common flow information. This is kind of an abuse of the flow table, but it's a useful interim step towards full flow table integration for ICMP. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- Makefile | 6 +-- flow.c | 9 ++++ flow.h | 4 ++ flow_table.h | 2 + icmp.c | 127 +++++++++++++++++++++++---------------------------- icmp.h | 2 +- icmp_flow.h | 31 +++++++++++++ passt.c | 5 -- 8 files changed, 106 insertions(+), 80 deletions(-) create mode 100644 icmp_flow.h diff --git a/Makefile b/Makefile index af4fa87..610ab96 100644 --- a/Makefile +++ b/Makefile @@ -54,9 +54,9 @@ SRCS = $(PASST_SRCS) $(QRAP_SRCS) MANPAGES = passt.1 pasta.1 qrap.1 PASST_HEADERS = arch.h arp.h checksum.h conf.h dhcp.h dhcpv6.h flow.h \ - flow_table.h icmp.h inany.h isolation.h lineread.h log.h ndp.h \ - netlink.h packet.h passt.h pasta.h pcap.h pif.h port_fwd.h siphash.h \ - tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h + flow_table.h icmp.h icmp_flow.h inany.h isolation.h lineread.h log.h \ + ndp.h netlink.h packet.h passt.h pasta.h pcap.h pif.h port_fwd.h \ + siphash.h tap.h tcp.h tcp_conn.h tcp_splice.h udp.h util.h HEADERS = $(PASST_HEADERS) seccomp.h C := \#include <linux/tcp.h>\nstruct tcp_info x = { .tcpi_snd_wnd = 0 }; diff --git a/flow.c b/flow.c index e4ea931..ef146dc 100644 --- a/flow.c +++ b/flow.c @@ -22,6 +22,8 @@ const char *flow_type_str[] = { [FLOW_TYPE_NONE] = "<none>", [FLOW_TCP] = "TCP connection", [FLOW_TCP_SPLICE] = "TCP connection (spliced)", + [FLOW_PING4] = "ICMP ping sequence", + [FLOW_PING6] = "ICMPv6 ping sequence", }; static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES, "flow_type_str[] doesn't match enum flow_type"); @@ -29,6 +31,8 @@ static_assert(ARRAY_SIZE(flow_type_str) == FLOW_NUM_TYPES, const uint8_t flow_proto[] = { [FLOW_TCP] = IPPROTO_TCP, [FLOW_TCP_SPLICE] = IPPROTO_TCP, + [FLOW_PING4] = IPPROTO_ICMP, + [FLOW_PING6] = IPPROTO_ICMPV6, }; static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES, "flow_proto[] doesn't match enum flow_type"); @@ -438,6 +442,11 @@ void flow_defer_handler(const struct ctx *c, const struct timespec *now) if (!closed && timer) tcp_splice_timer(c, flow); break; + case FLOW_PING4: + case FLOW_PING6: + if (timer) + closed = icmp_ping_timer(c, flow, now); + break; default: /* Assume other flow types don't need any handling */ ; diff --git a/flow.h b/flow.h index e1aeeed..da2a629 100644 --- a/flow.h +++ b/flow.h @@ -19,6 +19,10 @@ enum flow_type { FLOW_TCP, /* A TCP connection between a host socket and ns socket */ FLOW_TCP_SPLICE, + /* ICMP echo requests from guest to host and matching replies back */ + FLOW_PING4, + /* ICMPv6 echo requests from guest to host and matching replies back */ + FLOW_PING6, FLOW_NUM_TYPES, }; diff --git a/flow_table.h b/flow_table.h index 34fc679..091b430 100644 --- a/flow_table.h +++ b/flow_table.h @@ -8,6 +8,7 @@ #define FLOW_TABLE_H #include "tcp_conn.h" +#include "icmp_flow.h" /** * struct flow_free_block - Information about a block of free entries @@ -33,6 +34,7 @@ union flow { struct flow_free_block free; struct tcp_tap_conn tcp; struct tcp_splice_conn tcp_splice; + struct icmp_ping_flow ping; }; /* Global Flow Table */ diff --git a/icmp.c b/icmp.c index d669351..27b150d 100644 --- a/icmp.c +++ b/icmp.c @@ -37,24 +37,15 @@ #include "tap.h" #include "log.h" #include "icmp.h" +#include "flow_table.h" #define ICMP_ECHO_TIMEOUT 60 /* s, timeout for ICMP socket activity */ #define ICMP_NUM_IDS (1U << 16) -/** - * struct icmp_id_sock - Tracking information for single ICMP echo identifier - * @sock: Bound socket for identifier - * @seq: Last sequence number sent to tap, host order, -1: not sent yet - * @ts: Last associated activity from tap, seconds - */ -struct icmp_id_sock { - int sock; - int seq; - time_t ts; -}; +#define PINGF(idx) (&(FLOW(idx)->ping)) /* Indexed by ICMP echo identifier */ -static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS]; +static struct icmp_ping_flow *icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS]; /** * icmp_sock_handler() - Handle new data from ICMP or ICMPv6 socket @@ -64,8 +55,8 @@ static struct icmp_id_sock icmp_id_map[IP_VERSIONS][ICMP_NUM_IDS]; */ void icmp_sock_handler(const struct ctx *c, int af, union epoll_ref ref) { - struct icmp_id_sock *const id_map = af == AF_INET - ? &icmp_id_map[V4][ref.icmp.id] : &icmp_id_map[V6][ref.icmp.id]; + struct icmp_ping_flow *pingf = af == AF_INET + ? icmp_id_map[V4][ref.icmp.id] : icmp_id_map[V6][ref.icmp.id]; const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6"; char buf[USHRT_MAX]; union { @@ -80,6 +71,8 @@ void icmp_sock_handler(const struct ctx *c, int af, union epoll_ref ref) if (c->no_icmp) return; + ASSERT(pingf); + n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl); if (n < 0) { warn("%s: recvfrom() error on ping socket: %s", @@ -113,10 +106,10 @@ void icmp_sock_handler(const struct ctx *c, int af, union epoll_ref ref) } if (c->mode == MODE_PASTA) { - if (id_map->seq == seq) + if (pingf->seq == seq) return; - id_map->seq = seq; + pingf->seq = seq; } debug("%s: echo reply to tap, ID: %"PRIu16", seq: %"PRIu16, pname, @@ -133,16 +126,19 @@ unexpected: } /** - * icmp_ping_close() - Close out and cleanup a ping sequence + * icmp_ping_close() - Close out and cleanup a ping flow * @c: Execution context - * @id_map: id map entry of the sequence to close + * @pingf: ping flow entry to close */ -static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_map) +static void icmp_ping_close(const struct ctx *c, struct icmp_ping_flow *pingf) { - epoll_ctl(c->epollfd, EPOLL_CTL_DEL, id_map->sock, NULL); - close(id_map->sock); - id_map->sock = -1; - id_map->seq = -1; + epoll_ctl(c->epollfd, EPOLL_CTL_DEL, pingf->sock, NULL); + close(pingf->sock); + + if (pingf->f.type == FLOW_PING4) + icmp_id_map[V4][pingf->id] = NULL; + else + icmp_id_map[V6][pingf->id] = NULL; } /** @@ -152,18 +148,24 @@ static void icmp_ping_close(const struct ctx *c, struct icmp_id_sock *id_map) * @af: Address family, AF_INET or AF_INET6 * @id: ICMP id for the new sequence * - * Return: Newly opened ping socket fd, or -1 on failure + * Return: Newly opened ping flow, or NULL on failure */ -static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_map, - int af, uint16_t id) +static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, + struct icmp_ping_flow **id_map, + int af, uint16_t id) { - uint8_t proto = af == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6; const char *const pname = af == AF_INET ? "ICMP" : "ICMPv6"; + uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6; union icmp_epoll_ref iref = { .id = id }; + union flow *flow = flow_alloc(); + struct icmp_ping_flow *pingf; const void *bind_addr; const char *bind_if; int s; + if (!flow) + return NULL; + if (af == AF_INET) { bind_addr = &c->ip4.addr_out; bind_if = c->ip4.ifname_out; @@ -172,7 +174,7 @@ static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_map, bind_if = c->ip6.ifname_out; } - s = sock_l4(c, af, proto, bind_addr, bind_if, 0, iref.u32); + s = sock_l4(c, af, flow_proto[flowtype], bind_addr, bind_if, 0, iref.u32); if (s < 0) { warn("Cannot open \"ping\" socket. You might need to:"); @@ -184,16 +186,23 @@ static int icmp_ping_new(const struct ctx *c, struct icmp_id_sock *id_map, if (s > FD_REF_MAX) goto cancel; - id_map->sock = s; + pingf = &flow->ping; + pingf->f.type = flowtype; + pingf->seq = -1; + pingf->sock = s; + pingf->id = id; + + *id_map = pingf; debug("%s: new socket %i for echo ID %"PRIu16, pname, s, id); - return s; + return pingf; cancel: if (s >= 0) close(s); - return -1; + flow_alloc_cancel(flow); + return NULL; } /** @@ -219,11 +228,10 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, int af, struct sockaddr_in6 sa6; } sa = { .sa.sa_family = af }; const socklen_t sl = af == AF_INET ? sizeof(sa.sa4) : sizeof(sa.sa6); - struct icmp_id_sock *id_map; + struct icmp_ping_flow *pingf, **id_map; uint16_t id, seq; size_t plen; void *pkt; - int s; (void)saddr; (void)pif; @@ -263,13 +271,13 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, int af, ASSERT(0); } - if ((s = id_map->sock) < 0) - if ((s = icmp_ping_new(c, id_map, af, id)) < 0) + if (!(pingf = *id_map)) + if (!(pingf = icmp_ping_new(c, id_map, af, id))) return 1; - id_map->ts = now->tv_sec; + pingf->ts = now->tv_sec; - if (sendto(s, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) { + if (sendto(pingf->sock, pkt, plen, MSG_NOSIGNAL, &sa.sa, sl) < 0) { debug("%s: failed to relay request to socket: %s", pname, strerror(errno)); } else { @@ -281,44 +289,21 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, int af, } /** - * icmp_timer_one() - Handler for timed events related to a given identifier - * @c: Execution context - * @id_map: id socket information to check timers for - * @now: Current timestamp - */ -static void icmp_timer_one(const struct ctx *c, struct icmp_id_sock *id_map, - const struct timespec *now) -{ - if (id_map->sock < 0 || now->tv_sec - id_map->ts <= ICMP_ECHO_TIMEOUT) - return; - - icmp_ping_close(c, id_map); -} - -/** - * icmp_timer() - Scan activity bitmap for identifiers with timed events + * icmp_ping_timer() - Handler for timed events related to a given flow * @c: Execution context + * @flow: flow table entry to check for timeout * @now: Current timestamp + * + * Return: true if the flow is ready to free, false otherwise */ -void icmp_timer(const struct ctx *c, const struct timespec *now) +bool icmp_ping_timer(const struct ctx *c, union flow *flow, + const struct timespec *now) { - unsigned int i; - - for (i = 0; i < ICMP_NUM_IDS; i++) { - icmp_timer_one(c, &icmp_id_map[V4][i], now); - icmp_timer_one(c, &icmp_id_map[V6][i], now); - } -} + struct icmp_ping_flow *pingf = &flow->ping; -/** - * icmp_init() - Initialise sequences in ID map to -1 (no sequence sent yet) - */ -void icmp_init(void) -{ - unsigned i; + if (now->tv_sec - pingf->ts <= ICMP_ECHO_TIMEOUT) + return false; - for (i = 0; i < ICMP_NUM_IDS; i++) { - icmp_id_map[V4][i].seq = icmp_id_map[V6][i].seq = -1; - icmp_id_map[V4][i].sock = icmp_id_map[V6][i].sock = -1; - } + icmp_ping_close(c, pingf); + return true; } diff --git a/icmp.h b/icmp.h index 0083597..2897f31 100644 --- a/icmp.h +++ b/icmp.h @@ -9,12 +9,12 @@ #define ICMP_TIMER_INTERVAL 10000 /* ms */ struct ctx; +struct icmp_ping_flow; void icmp_sock_handler(const struct ctx *c, int af, union epoll_ref ref); int icmp_tap_handler(const struct ctx *c, uint8_t pif, int af, const void *saddr, const void *daddr, const struct pool *p, const struct timespec *now); -void icmp_timer(const struct ctx *c, const struct timespec *now); void icmp_init(void); /** diff --git a/icmp_flow.h b/icmp_flow.h new file mode 100644 index 0000000..7b7f4ee --- /dev/null +++ b/icmp_flow.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright Red Hat + * Author: David Gibson <david(a)gibson.dropbear.id.au> + * + * ICMP flow tracking data structures + */ +#ifndef ICMP_FLOW_H +#define ICMP_FLOW_H + +/** + * struct icmp_ping_flow - Descriptor for a flow of ping requests/replies + * @f: Generic flow information + * @seq: Last sequence number sent to tap, host order, -1: not sent yet + * @sock: "ping" socket + * @ts: Last associated activity from tap, seconds + * @id: Guest side ICMP ping id for this flow + */ +struct icmp_ping_flow { + /* Must be first element */ + struct flow_common f; + + int seq; + int sock; + time_t ts; + uint16_t id; +}; + +bool icmp_ping_timer(const struct ctx *c, union flow *flow, + const struct timespec *now); + +#endif /* ICMP_FLOW_H */ diff --git a/passt.c b/passt.c index 44d3a0b..9191404 100644 --- a/passt.c +++ b/passt.c @@ -105,8 +105,6 @@ static void post_handler(struct ctx *c, const struct timespec *now) CALL_PROTO_HANDLER(c, now, tcp, TCP); /* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */ CALL_PROTO_HANDLER(c, now, udp, UDP); - /* NOLINTNEXTLINE(bugprone-branch-clone): intervals can be the same */ - CALL_PROTO_HANDLER(c, now, icmp, ICMP); flow_defer_handler(c, now); #undef CALL_PROTO_HANDLER @@ -290,9 +288,6 @@ int main(int argc, char **argv) if ((!c.no_udp && udp_init(&c)) || (!c.no_tcp && tcp_init(&c))) exit(EXIT_FAILURE); - if (!c.no_icmp) - icmp_init(); - proto_update_l2_buf(c.mac_guest, c.mac); if (c.ifi4 && !c.no_dhcp) -- 2.43.0