The solution to bug https://bugs.passt.top/show_bug.cgi?id=120
requires the ability to translate from an IP address to its
corresponding MAC address in cases where those are present in
the ARP or NDP tables.
To keep track of the contents of these tables we add a netlink
based neighbour subscription feature.
Signed-off-by: Jon Maloy
---
v3: - Added an attribute contianing NDA_DST to sent message, so
that we let the kernel do the filtering of the IP address
and return only one entry.
- Added interface index to the call signature. Since the only
interface we know is the template interface, this limits
the number of hosts that will be seen as 'network segment
local' from a PASST viewpoint.
v4: - Made loop independent of attribute order.
- Ignoring L2 addresses which are not of size ETH_ALEN.
v5: - Changed return value of new function, so caller can know if
a MAC address really was found.
v6: - Removed warning printout which had ended up in the wrong
commit.
v8: - Changed to neighbour event subscription model
- netlink: arp/ndp table subscription
v10:- Updated according to David's latest comments on v8
- Added functionaly where we initially read current
state of ARP/NDP tables
v12:- Updates based on feedback from David and Stefano
v13:- Updates based on feedback from David and Stefano
v14:- Updates based on feedback from David
v15:- Updates based on feedback from Stefano
---
epoll_type.h | 2 +
netlink.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++-
netlink.h | 4 +
passt.c | 7 ++
4 files changed, 211 insertions(+), 3 deletions(-)
diff --git a/epoll_type.h b/epoll_type.h
index 12ac59b..a90ffb6 100644
--- a/epoll_type.h
+++ b/epoll_type.h
@@ -44,6 +44,8 @@ enum epoll_type {
EPOLL_TYPE_REPAIR_LISTEN,
/* TCP_REPAIR helper socket */
EPOLL_TYPE_REPAIR,
+ /* Netlink neighbour subscription socket */
+ EPOLL_TYPE_NL_NEIGH,
EPOLL_NUM_TYPES,
};
diff --git a/netlink.c b/netlink.c
index 9fe70f2..2896e23 100644
--- a/netlink.c
+++ b/netlink.c
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include
#include
@@ -40,6 +41,10 @@
#define RTNH_NEXT_AND_DEC(rtnh, attrlen) \
((attrlen) -= RTNH_ALIGN((rtnh)->rtnh_len), RTNH_NEXT(rtnh))
+/* Convenience macro borrowed from kernel */
+#define NUD_VALID \
+ (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE)
+
/* Netlink expects a buffer of at least 8kiB or the system page size,
* whichever is larger. 32kiB is recommended for more efficient.
* Since the largest page size on any remotely common Linux setup is
@@ -50,9 +55,10 @@
#define NLBUFSIZ 65536
/* Socket in init, in target namespace, sequence (just needs to be monotonic) */
-int nl_sock = -1;
-int nl_sock_ns = -1;
-static int nl_seq = 1;
+int nl_sock = -1;
+int nl_sock_ns = -1;
+static int nl_sock_neigh = -1;
+static int nl_seq = 1;
/**
* nl_sock_init_do() - Set up netlink sockets in init or target namespace
@@ -1103,3 +1109,192 @@ int nl_link_set_flags(int s, unsigned int ifi,
return nl_do(s, &req, RTM_NEWLINK, 0, sizeof(req));
}
+
+/**
+ * nl_neigh_msg_read() - Interpret a neighbour state message from netlink
+ * @c: Execution context
+ * @nh: Message to be read
+ */
+static void nl_neigh_msg_read(const struct ctx *c, struct nlmsghdr *nh)
+{
+ struct ndmsg *ndm = NLMSG_DATA(nh);
+ struct rtattr *rta = (struct rtattr *)(ndm + 1);
+ size_t na = NLMSG_PAYLOAD(nh, sizeof(*ndm));
+ char ip_str[INET6_ADDRSTRLEN];
+ char mac_str[ETH_ADDRSTRLEN];
+ const uint8_t *lladdr = NULL;
+ const void *dst = NULL;
+ size_t lladdr_len = 0;
+ union inany_addr addr;
+ size_t dstlen = 0;
+
+ if (nh->nlmsg_type == NLMSG_DONE)
+ return;
+
+ if (nh->nlmsg_type == NLMSG_ERROR) {
+ const struct nlmsgerr *errmsg = (struct nlmsgerr *)ndm;
+
+ warn("netlink error message on neighbour notifier: %s",
+ strerror_(-errmsg->error));
+ return;
+ }
+
+ if (nh->nlmsg_type != RTM_NEWNEIGH && nh->nlmsg_type != RTM_DELNEIGH)
+ return;
+
+ for (; RTA_OK(rta, na); rta = RTA_NEXT(rta, na)) {
+ if (rta->rta_type == NDA_DST) {
+ dst = RTA_DATA(rta);
+ dstlen = RTA_PAYLOAD(rta);
+ } else if (rta->rta_type == NDA_LLADDR) {
+ lladdr = RTA_DATA(rta);
+ lladdr_len = RTA_PAYLOAD(rta);
+ }
+ }
+
+ if (!dst)
+ return;
+
+ if (ndm->ndm_family == AF_INET && ndm->ndm_ifindex != c->ifi4)
+ return;
+
+ if (ndm->ndm_family == AF_INET6 && ndm->ndm_ifindex != c->ifi6)
+ return;
+
+ if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
+ return;
+
+ if (ndm->ndm_family == AF_INET && dstlen != sizeof(struct in_addr)) {
+ warn("netlink: wrong address length in AF_INET notification");
+ return;
+ }
+ if (ndm->ndm_family == AF_INET6 && dstlen != sizeof(struct in6_addr)) {
+ warn("netlink: wrong address length in AF_INET6 notification");
+ return;
+ }
+ inany_from_af(&addr, ndm->ndm_family, dst);
+ inany_ntop(&addr, ip_str, sizeof(ip_str));
+
+ if (nh->nlmsg_type == RTM_DELNEIGH) {
+ trace("neighbour notifier delete: %s", ip_str);
+ return;
+ }
+ if (!(ndm->ndm_state & NUD_VALID)) {
+ trace("neighbour notifier: %s unreachable, state: 0x%02x",
+ ip_str, ndm->ndm_state);
+ return;
+ }
+ if (!lladdr) {
+ warn("RTM_NEWNEIGH %s: missing link layer address", ip_str);
+ return;
+ }
+ if (lladdr_len != ETH_ALEN || ndm->ndm_type != ARPHRD_ETHER)
+ return;
+
+ eth_ntop(lladdr, mac_str, sizeof(mac_str));
+ trace("neighbour notifier update: %s / %s", ip_str, mac_str);
+}
+
+/**
+ * nl_neigh_sync() - Read current contents of ARP/NDP tables
+ * @c: Execution context
+ * @proto: Protocol, AF_INET or AF_INET6
+ * @ifi: Interface index
+ */
+static void nl_neigh_sync(const struct ctx *c, int proto, int ifi)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct ndmsg ndm;
+ } req = {
+ .ndm.ndm_family = proto,
+ .ndm.ndm_ifindex = ifi,
+ };
+ struct nlmsghdr *nh;
+ char buf[NLBUFSIZ];
+ ssize_t status;
+ uint32_t seq;
+
+ seq = nl_send(nl_sock_neigh, &req, RTM_GETNEIGH,
+ NLM_F_DUMP, sizeof(req));
+ nl_foreach_oftype(nh, status, nl_sock_neigh, buf, seq, RTM_NEWNEIGH)
+ nl_neigh_msg_read(c, nh);
+ if (status < 0)
+ warn("netlink: RTM_GETNEIGH failed: %s", strerror_(-status));
+}
+
+/**
+ * nl_neigh_notify_handler() - Non-blocking drain of pending neighbour updates
+ * @c: Execution context
+ */
+void nl_neigh_notify_handler(const struct ctx *c)
+{
+ char buf[NLBUFSIZ];
+
+ for (;;) {
+ ssize_t n = recv(nl_sock_neigh, buf, sizeof(buf), MSG_DONTWAIT);
+ struct nlmsghdr *nh = (struct nlmsghdr *)buf;
+
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno != EAGAIN)
+ warn_perror("netlink notifier read error");
+ return;
+ }
+ for (; NLMSG_OK(nh, n); nh = NLMSG_NEXT(nh, n))
+ nl_neigh_msg_read(c, nh);
+ }
+}
+
+/**
+ * nl_neigh_notify_init() - Subscribe to neighbour events
+ * @c: Execution context
+ *
+ * Return: 0 on success, -1 on failure
+ */
+int nl_neigh_notify_init(const struct ctx *c)
+{
+ union epoll_ref ref = {
+ .type = EPOLL_TYPE_NL_NEIGH
+ };
+ struct epoll_event ev = {
+ .events = EPOLLIN
+ };
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ .nl_groups = RTMGRP_NEIGH,
+ };
+
+ if (nl_sock_neigh >= 0) {
+ warn("netlink: neighbour notifier socket already exists");
+ return 0;
+ }
+
+ nl_sock_neigh = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
+ NETLINK_ROUTE);
+ if (nl_sock_neigh < 0) {
+ warn_perror("Failed to create neighbour notifier socket");
+ return -1;
+ }
+
+ if (bind(nl_sock_neigh, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ warn_perror("Failed to bind neighbour notifier socket");
+ close(nl_sock_neigh);
+ nl_sock_neigh = -1;
+ return -1;
+ }
+
+ ev.data.u64 = ref.u64;
+ if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, nl_sock_neigh, &ev) == -1) {
+ warn_perror("epoll_ctl() on neighbour notifier socket failed");
+ close(nl_sock_neigh);
+ nl_sock_neigh = -1;
+ return -1;
+ }
+
+ nl_neigh_sync(c, AF_INET, c->ifi4);
+ nl_neigh_sync(c, AF_INET6, c->ifi6);
+
+ return 0;
+}
diff --git a/netlink.h b/netlink.h
index b51e99c..8f1e9b9 100644
--- a/netlink.h
+++ b/netlink.h
@@ -17,6 +17,8 @@ int nl_route_dup(int s_src, unsigned int ifi_src,
int s_dst, unsigned int ifi_dst, sa_family_t af);
int nl_addr_get(int s, unsigned int ifi, sa_family_t af,
void *addr, int *prefix_len, void *addr_l);
+bool nl_neigh_mac_get(int s, const union inany_addr *addr, int ifi,
+ unsigned char *mac);
int nl_addr_set(int s, unsigned int ifi, sa_family_t af,
const void *addr, int prefix_len);
int nl_addr_get_ll(int s, unsigned int ifi, struct in6_addr *addr);
@@ -28,5 +30,7 @@ int nl_link_set_mac(int s, unsigned int ifi, const void *mac);
int nl_link_set_mtu(int s, unsigned int ifi, int mtu);
int nl_link_set_flags(int s, unsigned int ifi,
unsigned int set, unsigned int change);
+int nl_neigh_notify_init(const struct ctx *c);
+void nl_neigh_notify_handler(const struct ctx *c);
#endif /* NETLINK_H */
diff --git a/passt.c b/passt.c
index bdb7b69..a10e469 100644
--- a/passt.c
+++ b/passt.c
@@ -53,6 +53,7 @@
#include "vu_common.h"
#include "migrate.h"
#include "repair.h"
+#include "netlink.h"
#define NUM_EPOLL_EVENTS 8
@@ -79,6 +80,7 @@ char *epoll_type_str[] = {
[EPOLL_TYPE_VHOST_KICK] = "vhost-user kick socket",
[EPOLL_TYPE_REPAIR_LISTEN] = "TCP_REPAIR helper listening socket",
[EPOLL_TYPE_REPAIR] = "TCP_REPAIR helper socket",
+ [EPOLL_TYPE_NL_NEIGH] = "netlink neighbour notifier socket",
};
static_assert(ARRAY_SIZE(epoll_type_str) == EPOLL_NUM_TYPES,
"epoll_type_str[] doesn't match enum epoll_type");
@@ -322,6 +324,8 @@ int main(int argc, char **argv)
pcap_init(&c);
+ nl_neigh_notify_init(&c);
+
if (!c.foreground) {
if ((devnull_fd = open("/dev/null", O_RDWR | O_CLOEXEC)) < 0)
die_perror("Failed to open /dev/null");
@@ -414,6 +418,9 @@ loop:
case EPOLL_TYPE_REPAIR:
repair_handler(&c, eventmask);
break;
+ case EPOLL_TYPE_NL_NEIGH:
+ nl_neigh_notify_handler(&c);
+ break;
default:
/* Can't happen */
ASSERT(0);
--
2.50.1