We add subscriptions to RTMGRP_LINK, RTMGRP_IPV4_IFADDR, and
RTMGRP_IPV6_IFADDR, so that we can receive notifications when link
state or addresses change on the namespace interface.
When addresses are discovered via netlink:
- We mark them as non-permanent, which means they can be modified or
deleted by subsequent events.
- We apply the prefix indicated in the notification.
- Update addr_seen to track the new address as the active one.
This provides the foundation for dynamic address monitoring,
and supports runtime network changes.
Signed-off-by: Jon Maloy
---
epoll_type.h | 2 +
netlink.c | 370 +++++++++++++++++++++++++++++++++++++++++++++++++++
netlink.h | 3 +
passt.c | 5 +
passt.h | 1 +
tap.c | 6 +-
6 files changed, 384 insertions(+), 3 deletions(-)
diff --git a/epoll_type.h b/epoll_type.h
index a90ffb6..0a16d94 100644
--- a/epoll_type.h
+++ b/epoll_type.h
@@ -46,6 +46,8 @@ enum epoll_type {
EPOLL_TYPE_REPAIR,
/* Netlink neighbour subscription socket */
EPOLL_TYPE_NL_NEIGH,
+ /* Netlink link/address subscription socket */
+ EPOLL_TYPE_NL_LINKADDR,
EPOLL_NUM_TYPES,
};
diff --git a/netlink.c b/netlink.c
index 82a2f0c..7492f17 100644
--- a/netlink.c
+++ b/netlink.c
@@ -35,6 +35,9 @@
#include "passt.h"
#include "log.h"
#include "ip.h"
+#include "tap.h"
+#include "arp.h"
+#include "ndp.h"
#include "netlink.h"
#include "epoll_ctl.h"
@@ -59,6 +62,7 @@
int nl_sock = -1;
int nl_sock_ns = -1;
static int nl_sock_neigh = -1;
+static int nl_sock_linkaddr = -1;
static int nl_seq = 1;
/**
@@ -91,6 +95,372 @@ static int nl_sock_init_do(void *arg)
return 0;
}
+/**
+ * nl_addr4_find() - Find an IPv4 address in the address array
+ * @c: Execution context
+ * @addr: Address to find
+ *
+ * Return: index if found, -1 otherwise
+ */
+static int nl_addr4_find(const struct ctx *c, const struct in_addr *addr)
+{
+ int i;
+
+ for (i = 0; i < c->ip4.addr_count; i++)
+ if (IN4_ARE_ADDR_EQUAL(&c->ip4.addrs[i].addr, addr))
+ return (int)i;
+
+ return -1;
+}
+
+/**
+ * nl_addr6_find() - Find an IPv6 address in the address array
+ * @c: Execution context
+ * @addr: Address to find
+ *
+ * Return: index if found, -1 otherwise
+ */
+static int nl_addr6_find(const struct ctx *c, const struct in6_addr *addr)
+{
+ int i;
+
+ for (i = 0; i < c->ip6.addr_count; i++)
+ if (IN6_ARE_ADDR_EQUAL(&c->ip6.addrs[i].addr, addr))
+ return (int)i;
+
+ return -1;
+}
+
+/**
+ * nl_addr4_add() - Add a discovered IPv4 address to the address array
+ * @c: Execution context
+ * @addr: Address to add
+ * @prefix_len: Prefix length
+ *
+ * Return: true if added or updated, false if array full or already permanent
+ */
+static bool nl_addr4_add(struct ctx *c, const struct in_addr *addr,
+ int prefix_len)
+{
+ int idx = nl_addr4_find(c, addr);
+
+ if (idx >= 0) {
+ /* Address exists - if permanent, don't touch; else update */
+ if (c->ip4.addrs[idx].permanent)
+ return false;
+ c->ip4.addrs[idx].prefix_len = prefix_len;
+ return true;
+ }
+
+ /* New address - add if room */
+ if (c->ip4.addr_count >= IP4_MAX_ADDRS) {
+ debug("IPv4 address array full, ignoring discovered address");
+ return false;
+ }
+
+ idx = c->ip4.addr_count++;
+ c->ip4.addrs[idx].addr = *addr;
+ c->ip4.addrs[idx].prefix_len = prefix_len;
+ c->ip4.addrs[idx].permanent = 0;
+ return true;
+}
+
+/**
+ * nl_addr6_add() - Add a discovered IPv6 address to the address array
+ * @c: Execution context
+ * @addr: Address to add
+ * @prefix_len: Prefix length
+ *
+ * Return: true if added or updated, false if array full or already permanent
+ */
+static bool nl_addr6_add(struct ctx *c, const struct in6_addr *addr,
+ int prefix_len)
+{
+ int idx = nl_addr6_find(c, addr);
+
+ if (idx >= 0) {
+ /* Address exists - if permanent, don't touch; else update */
+ if (c->ip6.addrs[idx].permanent)
+ return false;
+ c->ip6.addrs[idx].prefix_len = prefix_len;
+ return true;
+ }
+
+ /* New address - add if room */
+ if (c->ip6.addr_count >= IP6_MAX_ADDRS) {
+ debug("IPv6 address array full, ignoring discovered address");
+ return false;
+ }
+
+ idx = c->ip6.addr_count++;
+ c->ip6.addrs[idx].addr = *addr;
+ c->ip6.addrs[idx].prefix_len = prefix_len;
+ c->ip6.addrs[idx].permanent = 0;
+ return true;
+}
+
+/**
+ * nl_addr4_del() - Remove an IPv4 address from the array if not permanent
+ * @c: Execution context
+ * @addr: Address to remove
+ *
+ * Return: true if removed, false if not found or permanent
+ */
+static bool nl_addr4_del(struct ctx *c, const struct in_addr *addr)
+{
+ int i, idx = nl_addr4_find(c, addr);
+
+ if (idx < 0)
+ return false;
+
+ if (c->ip4.addrs[idx].permanent)
+ return false;
+
+ /* Shift remaining entries down */
+ c->ip4.addr_count--;
+ for (i = idx; i < c->ip4.addr_count; i++)
+ c->ip4.addrs[i] = c->ip4.addrs[i + 1];
+
+ return true;
+}
+
+/**
+ * nl_addr6_del() - Remove an IPv6 address from the array if not permanent
+ * @c: Execution context
+ * @addr: Address to remove
+ *
+ * Return: true if removed, false if not found or permanent
+ */
+static bool nl_addr6_del(struct ctx *c, const struct in6_addr *addr)
+{
+ int i, idx = nl_addr6_find(c, addr);
+
+ if (idx < 0)
+ return false;
+
+ if (c->ip6.addrs[idx].permanent)
+ return false;
+
+ /* Shift remaining entries down */
+ c->ip6.addr_count--;
+ for (i = idx; i < c->ip6.addr_count; i++)
+ c->ip6.addrs[i] = c->ip6.addrs[i + 1];
+
+ return true;
+}
+
+/**
+ * nl_linkaddr_msg_read() - Parse and log a netlink link/addr message
+ * @c: Execution context
+ * @nh: Netlink message header
+ */
+static void nl_linkaddr_msg_read(struct ctx *c, const struct nlmsghdr *nh)
+{
+ if (nh->nlmsg_type == NLMSG_DONE || nh->nlmsg_type == NLMSG_ERROR)
+ return;
+
+ if (nh->nlmsg_type == RTM_NEWLINK || nh->nlmsg_type == RTM_DELLINK) {
+ const struct ifinfomsg *ifm = NLMSG_DATA(nh);
+ struct rtattr *rta = IFLA_RTA(ifm);
+ size_t na = IFLA_PAYLOAD(nh);
+ const char *name = "?";
+ bool up = !!(ifm->ifi_flags & IFF_UP);
+ bool running = !!(ifm->ifi_flags & IFF_RUNNING);
+
+ for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type == IFLA_IFNAME) {
+ name = (const char *)RTA_DATA(rta);
+ break;
+ }
+ }
+
+ /* Update pasta interface UP state if this is our interface */
+ if (c->mode == MODE_PASTA &&
+ (unsigned int)ifm->ifi_index == c->pasta_ifi) {
+ c->pasta_ifi_up = up;
+ debug("Interface %s", up ? "UP" : "DOWN");
+ }
+
+ if (nh->nlmsg_type == RTM_NEWLINK)
+ debug("Link %s (idx=%d): %s %s", name, ifm->ifi_index,
+ up ? "UP" : "DOWN", running ? "RUNNING" : "");
+ else
+ debug("Link %s (idx=%d): DELETED", name, ifm->ifi_index);
+
+ return;
+ }
+
+ if (nh->nlmsg_type == RTM_NEWADDR || nh->nlmsg_type == RTM_DELADDR) {
+ bool is_new = (nh->nlmsg_type == RTM_NEWADDR);
+ const struct ifaddrmsg *ifa = NLMSG_DATA(nh);
+ char addr_str[INET6_ADDRSTRLEN];
+ struct rtattr *rta = IFA_RTA(ifa);
+ char ifname[IFNAMSIZ] = { 0 };
+ size_t na = IFA_PAYLOAD(nh);
+ void *addr = NULL;
+
+ for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (ifa->ifa_family == AF_INET &&
+ rta->rta_type == IFA_LOCAL) {
+ addr = RTA_DATA(rta);
+ break;
+ } else if (ifa->ifa_family == AF_INET6 &&
+ rta->rta_type == IFA_ADDRESS) {
+ addr = RTA_DATA(rta);
+ break;
+ }
+ }
+
+ if (!addr)
+ return;
+
+ if_indextoname(ifa->ifa_index, ifname);
+ inet_ntop(ifa->ifa_family, addr, addr_str, sizeof(addr_str));
+
+ debug("%s addr on %s (index=%d): %s/%i%s",
+ is_new ? "NEW" : "DEL", ifname, ifa->ifa_index, addr_str,
+ ifa->ifa_prefixlen,
+ tap_is_ready(c) ? " (tap UP)" : " (tap DOWN)");
+
+ /* Only handle our pasta interface */
+ if (c->mode != MODE_PASTA || ifa->ifa_index != c->pasta_ifi)
+ return;
+
+ if (ifa->ifa_family == AF_INET) {
+ struct in_addr *a = (struct in_addr *)addr;
+
+ if (!is_new) {
+ nl_addr4_del(c, a);
+ return;
+ }
+
+ if (nl_addr4_add(c, a, ifa->ifa_prefixlen)) {
+ c->ip4.addr_seen = *a;
+ if (c->pasta_ifi_up && c->ifi4) {
+ debug("Sending ARP");
+ arp_send_init_req(c);
+ }
+ }
+ } else if (ifa->ifa_family == AF_INET6) {
+ struct in6_addr *a = (struct in6_addr *)addr;
+
+ if (!is_new) {
+ nl_addr6_del(c, a);
+ return;
+ }
+
+ if (nl_addr6_add(c, a,
+ ifa->ifa_prefixlen)) {
+ c->ip6.addr_seen = *a;
+ if (c->pasta_ifi_up &&
+ c->ifi6 && !c->no_ndp) {
+ debug("Sending NDP");
+ ndp_send_init_req(c);
+ }
+ }
+ }
+ }
+}
+
+/**
+ * nl_linkaddr_notify_handler() - Handle events from link/addr notifier socket
+ * @c: Execution context
+ */
+void nl_linkaddr_notify_handler(struct ctx *c)
+{
+ char buf[NLBUFSIZ];
+
+ for (;;) {
+ ssize_t n = recv(nl_sock_linkaddr, buf, sizeof(buf), MSG_DONTWAIT);
+ struct nlmsghdr *nh = (struct nlmsghdr *)buf;
+
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno != EAGAIN)
+ debug("recv() error: %s", strerror_(errno));
+ break;
+ }
+
+ debug("Received %zd bytes", n);
+
+ for (; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n))
+ nl_linkaddr_msg_read(c, nh);
+ }
+}
+
+/**
+ * nl_linkaddr_init_do() - Actually create and bind the netlink socket
+ * @arg: Execution context (for namespace entry) or NULL
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int nl_linkaddr_init_do(void *arg)
+{
+ struct sockaddr_nl addr = { .nl_family = AF_NETLINK,
+ .nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR |
+ RTMGRP_IPV6_IFADDR };
+
+ if (arg)
+ ns_enter((struct ctx *)arg);
+
+ nl_sock_linkaddr = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+ if (nl_sock_linkaddr < 0) {
+ debug("socket() failed: %s", strerror_(errno));
+ return -1;
+ }
+
+ if (bind(nl_sock_linkaddr, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ debug("bind() failed: %s", strerror_(errno));
+ close(nl_sock_linkaddr);
+ nl_sock_linkaddr = -1;
+ return -1;
+ }
+
+ debug("socket fd=%d", nl_sock_linkaddr);
+ return 0;
+}
+
+/**
+ * nl_linkaddr_notify_init() - Initialize link/address change notifier
+ * @c: Execution context
+ *
+ * Return: 0 on success, -1 on failure
+ */
+int nl_linkaddr_notify_init(const struct ctx *c)
+{
+ union epoll_ref ref = { .type = EPOLL_TYPE_NL_LINKADDR };
+ struct epoll_event ev = { .events = EPOLLIN };
+
+ if (nl_sock_linkaddr >= 0) {
+ debug("notifier already initialized (fd=%d)", nl_sock_linkaddr);
+ return 0;
+ }
+
+ /* Open the notifier socket in the namespace for pasta mode,
+ * or in the init namespace otherwise.
+ */
+ if (c->mode == MODE_PASTA)
+ NS_CALL(nl_linkaddr_init_do, (void *)c);
+ else
+ nl_linkaddr_init_do(NULL);
+
+ if (nl_sock_linkaddr < 0) {
+ warn("Failed to create/bind link/addr notifier socket");
+ return -1;
+ }
+
+ ev.data.u64 = ref.u64;
+ if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, nl_sock_linkaddr, &ev) == -1) {
+ warn("epoll_ctl() failed on link/addr notifier socket: %s",
+ strerror_(errno));
+ close(nl_sock_linkaddr);
+ nl_sock_linkaddr = -1;
+ return -1;
+ }
+
+ return 0;
+}
/**
* nl_sock_init() - Call nl_sock_init_do(), won't return on failure
* @c: Execution context
diff --git a/netlink.h b/netlink.h
index 8f1e9b9..1796a72 100644
--- a/netlink.h
+++ b/netlink.h
@@ -33,4 +33,7 @@ int nl_link_set_flags(int s, unsigned int ifi,
int nl_neigh_notify_init(const struct ctx *c);
void nl_neigh_notify_handler(const struct ctx *c);
+int nl_linkaddr_notify_init(const struct ctx *c);
+void nl_linkaddr_notify_handler(struct ctx *c);
+
#endif /* NETLINK_H */
diff --git a/passt.c b/passt.c
index 5ed88d0..f274858 100644
--- a/passt.c
+++ b/passt.c
@@ -80,6 +80,7 @@ char *epoll_type_str[] = {
[EPOLL_TYPE_REPAIR_LISTEN] = "TCP_REPAIR helper listening socket",
[EPOLL_TYPE_REPAIR] = "TCP_REPAIR helper socket",
[EPOLL_TYPE_NL_NEIGH] = "netlink neighbour notifier socket",
+ [EPOLL_TYPE_NL_LINKADDR] = "netlink link/address notifier socket",
};
static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
"epoll_type_str[] doesn't match enum epoll_type");
@@ -304,6 +305,9 @@ static void passt_worker(void *opaque, int nfds, struct epoll_event *events)
case EPOLL_TYPE_NL_NEIGH:
nl_neigh_notify_handler(c);
break;
+ case EPOLL_TYPE_NL_LINKADDR:
+ nl_linkaddr_notify_handler(c);
+ break;
default:
/* Can't happen */
ASSERT(0);
@@ -413,6 +417,7 @@ int main(int argc, char **argv)
fwd_neigh_table_init(&c);
nl_neigh_notify_init(&c);
+ nl_linkaddr_notify_init(&c);
if (!c.foreground) {
if ((devnull_fd = open("/dev/null", O_RDWR | O_CLOEXEC)) < 0)
diff --git a/passt.h b/passt.h
index 533f2cb..70ccaf1 100644
--- a/passt.h
+++ b/passt.h
@@ -264,6 +264,7 @@ struct ctx {
char pasta_ifn[IF_NAMESIZE];
unsigned int pasta_ifi;
int pasta_conf_ns;
+ int pasta_ifi_up; /* Namespace interface is UP */
int no_tcp;
struct tcp_ctx tcp;
diff --git a/tap.c b/tap.c
index 0b96cc1..a2a4459 100644
--- a/tap.c
+++ b/tap.c
@@ -1363,10 +1363,10 @@ bool tap_is_ready(const struct ctx *c)
return false;
if (c->mode == MODE_PASTA) {
- /* If pasta_conf_ns is set, the interface was configured and
- * brought up during initialization. If not, it's still down.
+ /* Check if the namespace interface is actually UP.
+ * This is tracked by netlink link notifications.
*/
- return c->pasta_conf_ns;
+ return c->pasta_ifi_up;
}
return true;
--
2.51.1