We add a cache table to keep track of the contents of the kernel ARP
and NDP tables. The table is fed from the just introduced netlink based
neigbour subscription function.
Signed-off-by: Jon Maloy
---
v5: - Moved to earlier in series to reduce rebase conflicts
v6: - Sqashed the hash list commit and the FIFO/LRU queue commit
- Removed hash lookup. We now only use linear lookup in a
linked list
- Eliminated dynamic memory allocation.
- Ensured there is only one call to clock_gettime()
- Using MAC_ZERO instead of the previously dedicated definitions
v7: - NOW using MAC_ZERO where needed
- I am still using linear back-off for empty cache entries. Even
an incoming, flow-creating packet from a local host gives no
guarantee that its MAC address is in the ARP table, so we must
allow for a few new attempts at first possible occasions. Only
after several failed lookups can we conclude that we probably
never will succeed. Hence the back-off.
- Fixed a bug that David inadvertently made me aware of: I only
intended to set the initial expiry value to MAC_CACHE_RENEWAL
when an ARP/NDP table lookup was successful.
- Improved struct and function description comments.
v8: - Total re-design of table, adapting to the new, subscription
based way of updating it.
v9: - Catering for MAC address change for an existing host.
v10: - Changes according to feedback from David Gibson
v12: - Changes according to feedback from David and Stefano
- Added dummy entries for loopback and default GW addresses
v13: - Changes according to feedback and discussions with David
and Stefano
v14: - Moved the call to nat_inbound() to a much more sensible
place in netlink.c, as suggested by David.
v15: - Changes according to feedback from David and Stefano
- Removed 'blocker' entry for guest_gw.
- Let blocker entry for ctx->map_guest_addr use our_mac_addr
- We let fwd_neigh_mac_get() do a secondary lookup in the
table, using the guest_gw address, when the primary lookup
fails. This way, we always return the correct gateway MAC
address if there happens to be a real host presenting that
address.
---
fwd.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
fwd.h | 7 ++
netlink.c | 12 +++-
passt.c | 1 +
4 files changed, 220 insertions(+), 2 deletions(-)
diff --git a/fwd.c b/fwd.c
index 250cf56..93d83e3 100644
--- a/fwd.c
+++ b/fwd.c
@@ -26,6 +26,7 @@
#include "passt.h"
#include "lineread.h"
#include "flow_table.h"
+#include "netlink.h"
/* Empheral port range: values from RFC 6335 */
static in_port_t fwd_ephemeral_min = (1 << 15) + (1 << 14);
@@ -33,6 +34,207 @@ static in_port_t fwd_ephemeral_max = NUM_PORTS - 1;
#define PORT_RANGE_SYSCTL "/proc/sys/net/ipv4/ip_local_port_range"
+#define NEIGH_TABLE_SLOTS 1024
+#define NEIGH_TABLE_SIZE (NEIGH_TABLE_SLOTS / 2)
+static_assert((NEIGH_TABLE_SLOTS & (NEIGH_TABLE_SLOTS - 1)) == 0,
+ "NEIGH_TABLE_SLOTS must be a power of two");
+
+/**
+ * struct neigh_table_entry - Entry in the ARP/NDP table
+ * @next: Next entry in slot or free list
+ * @addr: IP address of represented host
+ * @mac: MAC address of represented host
+ * @permanent: Entry cannot be altered or freed by notification
+ */
+struct neigh_table_entry {
+ struct neigh_table_entry *next;
+ union inany_addr addr;
+ uint8_t mac[ETH_ALEN];
+ bool permanent;
+};
+
+/**
+ * struct neigh_table - Cache of ARP/NDP table contents
+ * @entries: Entries to be plugged into the hash slots when allocated
+ * @slots: Hash table slots
+ * @free: Linked list of unused entries
+ */
+struct neigh_table {
+ struct neigh_table_entry entries[NEIGH_TABLE_SIZE];
+ struct neigh_table_entry *slots[NEIGH_TABLE_SLOTS];
+ struct neigh_table_entry *free;
+};
+
+static struct neigh_table neigh_table;
+
+/**
+ * neigh_table_slot() - Hash key to a number within the table range
+ * @c: Execution context
+ * @key: The key to be used for the hash
+ *
+ * Return: the resulting hash value
+ */
+static size_t neigh_table_slot(const struct ctx *c,
+ const union inany_addr *key)
+{
+ struct siphash_state st = SIPHASH_INIT(c->hash_secret);
+ uint32_t i;
+
+ inany_siphash_feed(&st, key);
+ i = siphash_final(&st, sizeof(*key), 0);
+
+ return ((size_t)i) & (NEIGH_TABLE_SIZE - 1);
+}
+
+/**
+ * fwd_neigh_table_find() - Find a MAC table entry
+ * @c: Execution context
+ * @addr: Neighbour address to be used as key for the lookup
+ *
+ * Return: the matching entry, if found. Otherwise NULL
+ */
+static struct neigh_table_entry *fwd_neigh_table_find(const struct ctx *c,
+ const union inany_addr *addr)
+{
+ size_t slot = neigh_table_slot(c, addr);
+ struct neigh_table_entry *e = neigh_table.slots[slot];
+
+ while (e && !inany_equals(&e->addr, addr))
+ e = e->next;
+
+ return e;
+}
+
+/**
+ * fwd_neigh_table_update() - Allocate or update neighbour table entry
+ * @c: Execution context
+ * @addr: IP address used to determine insertion slot and store in entry
+ * @mac: The MAC address associated with the neighbour address
+ * @permanent: Created entry cannot be altered or freed
+ */
+void fwd_neigh_table_update(const struct ctx *c, const union inany_addr *addr,
+ const uint8_t *mac, bool permanent)
+{
+ struct neigh_table *t = &neigh_table;
+ struct neigh_table_entry *e;
+ ssize_t slot;
+
+ /* MAC address might change sometimes */
+ e = fwd_neigh_table_find(c, addr);
+ if (e) {
+ if (!e->permanent)
+ memcpy(e->mac, mac, ETH_ALEN);
+ return;
+ }
+
+ e = t->free;
+ if (!e) {
+ debug("Failed to allocate neighbour table entry");
+ return;
+ }
+ t->free = e->next;
+ slot = neigh_table_slot(c, addr);
+ e->next = t->slots[slot];
+ t->slots[slot] = e;
+
+ memcpy(&e->addr, addr, sizeof(*addr));
+ memcpy(e->mac, mac, ETH_ALEN);
+ e->permanent = permanent;
+}
+
+/**
+ * fwd_neigh_table_free() - Remove an entry from a slot and add it to free list
+ * @c: Execution context
+ * @addr: IP address used to find the slot for the entry
+ */
+void fwd_neigh_table_free(const struct ctx *c, const union inany_addr *addr)
+{
+ ssize_t slot = neigh_table_slot(c, addr);
+ struct neigh_table *t = &neigh_table;
+ struct neigh_table_entry *e, **prev;
+
+ prev = &t->slots[slot];
+ e = t->slots[slot];
+ while (e && !inany_equals(&e->addr, addr)) {
+ prev = &e->next;
+ e = e->next;
+ }
+
+ if (!e || e->permanent)
+ return;
+
+ *prev = e->next;
+ e->next = t->free;
+ t->free = e;
+ memset(&e->addr, 0, sizeof(*addr));
+ memset(e->mac, 0, ETH_ALEN);
+}
+
+/**
+ * fwd_neigh_mac_get() - Look up MAC address in the ARP/NDP table
+ * @c: Execution context
+ * @addr: Neighbour IP address used as lookup key
+ * @mac: Buffer for returned MAC address
+ */
+void fwd_neigh_mac_get(const struct ctx *c, const union inany_addr *addr,
+ uint8_t *mac)
+{
+ const struct neigh_table_entry *e = fwd_neigh_table_find(c, addr);
+
+ if (!e) {
+ union inany_addr ggw;
+
+ if (inany_v4(addr))
+ ggw = inany_from_v4(c->ip4.guest_gw);
+ else
+ ggw.a6 = c->ip6.guest_gw;
+
+ e = fwd_neigh_table_find(c, &ggw);
+ }
+
+ if (e)
+ memcpy(mac, e->mac, ETH_ALEN);
+ else
+ memcpy(mac, c->our_tap_mac, ETH_ALEN);
+}
+
+/**
+ * fwd_neigh_table_init() - Initialize the neighbour table
+ * @c: Execution context
+ */
+void fwd_neigh_table_init(const struct ctx *c)
+{
+ union inany_addr mhl = inany_from_v4(c->ip4.map_host_loopback);
+ union inany_addr mga = inany_from_v4(c->ip4.map_guest_addr);
+ struct neigh_table *t = &neigh_table;
+ struct neigh_table_entry *e;
+ int i;
+
+ memset(t, 0, sizeof(*t));
+
+ for (i = 0; i < NEIGH_TABLE_SIZE; i++) {
+ e = &t->entries[i];
+ e->next = t->free;
+ t->free = e;
+ }
+
+ /* Blocker entries to stop events from hosts using these addresses */
+ if (!inany_is_unspecified4(&mhl))
+ fwd_neigh_table_update(c, &mhl, c->our_tap_mac, true);
+
+ if (!inany_is_unspecified4(&mga))
+ fwd_neigh_table_update(c, &mga, c->our_tap_mac, true);
+
+ mhl = *(union inany_addr *)&c->ip6.map_host_loopback;
+ mga = *(union inany_addr *)&c->ip6.map_guest_addr;
+
+ if (!inany_is_unspecified6(&mhl))
+ fwd_neigh_table_update(c, &mhl, c->our_tap_mac, true);
+
+ if (!inany_is_unspecified6(&mga))
+ fwd_neigh_table_update(c, &mga, c->our_tap_mac, true);
+}
+
/** fwd_probe_ephemeral() - Determine what ports this host considers ephemeral
*
* Work out what ports the host thinks are emphemeral and record it for later
diff --git a/fwd.h b/fwd.h
index 65c7c96..352f3b5 100644
--- a/fwd.h
+++ b/fwd.h
@@ -56,5 +56,12 @@ uint8_t fwd_nat_from_splice(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt);
uint8_t fwd_nat_from_host(const struct ctx *c, uint8_t proto,
const struct flowside *ini, struct flowside *tgt);
+void fwd_neigh_table_update(const struct ctx *c, const union inany_addr *addr,
+ const uint8_t *mac, bool permanent);
+void fwd_neigh_table_free(const struct ctx *c,
+ const union inany_addr *addr);
+void fwd_neigh_mac_get(const struct ctx *c, const union inany_addr *addr,
+ uint8_t *mac);
+void fwd_neigh_table_init(const struct ctx *c);
#endif /* FWD_H */
diff --git a/netlink.c b/netlink.c
index 2896e23..ee7466a 100644
--- a/netlink.c
+++ b/netlink.c
@@ -1123,9 +1123,9 @@ static void nl_neigh_msg_read(const struct ctx *c, struct nlmsghdr *nh)
char ip_str[INET6_ADDRSTRLEN];
char mac_str[ETH_ADDRSTRLEN];
const uint8_t *lladdr = NULL;
+ union inany_addr addr, daddr;
const void *dst = NULL;
size_t lladdr_len = 0;
- union inany_addr addr;
size_t dstlen = 0;
if (nh->nlmsg_type == NLMSG_DONE)
@@ -1172,16 +1172,23 @@ static void nl_neigh_msg_read(const struct ctx *c, struct nlmsghdr *nh)
warn("netlink: wrong address length in AF_INET6 notification");
return;
}
+
+ /* We only handle guest-side visible addresses */
inany_from_af(&addr, ndm->ndm_family, dst);
- inany_ntop(&addr, ip_str, sizeof(ip_str));
+ if (!nat_inbound(c, &addr, &daddr))
+ return;
+
+ inany_ntop(&daddr, ip_str, sizeof(ip_str));
if (nh->nlmsg_type == RTM_DELNEIGH) {
trace("neighbour notifier delete: %s", ip_str);
+ fwd_neigh_table_free(c, &daddr);
return;
}
if (!(ndm->ndm_state & NUD_VALID)) {
trace("neighbour notifier: %s unreachable, state: 0x%02x",
ip_str, ndm->ndm_state);
+ fwd_neigh_table_free(c, &daddr);
return;
}
if (!lladdr) {
@@ -1193,6 +1200,7 @@ static void nl_neigh_msg_read(const struct ctx *c, struct nlmsghdr *nh)
eth_ntop(lladdr, mac_str, sizeof(mac_str));
trace("neighbour notifier update: %s / %s", ip_str, mac_str);
+ fwd_neigh_table_update(c, &daddr, lladdr, false);
}
/**
diff --git a/passt.c b/passt.c
index a10e469..24e4348 100644
--- a/passt.c
+++ b/passt.c
@@ -324,6 +324,7 @@ int main(int argc, char **argv)
pcap_init(&c);
+ fwd_neigh_table_init(&c);
nl_neigh_notify_init(&c);
if (!c.foreground) {
--
2.50.1