Now that ICMP pass-through from socket-to-tap is in place, it is easy to support UDP based traceroute functionality in direction tap-to-socket. We fix that in this commit. Signed-off-by: Jon Maloy <jmaloy(a)redhat.com> --- v2: - Using ancillary data instead of setsockopt to transfer outgoing TTL. - Support IPv6 v3: - Storing ttl per packet instead of per flow. This may not be elegant, but much less intrusive than changing the flow criteria. This eliminates the need for the extra, flow-changing patch we introduced in v2. --- packet.c | 28 +++++++++++++++++----------- packet.h | 30 ++++++++++++++++++++++-------- tap.c | 3 ++- udp.c | 28 ++++++++++++++++++++++++---- udp.h | 3 ++- 5 files changed, 67 insertions(+), 25 deletions(-) diff --git a/packet.c b/packet.c index 72c6158..36a32fe 100644 --- a/packet.c +++ b/packet.c @@ -89,11 +89,12 @@ bool pool_full(const struct pool *p) * @p: Existing pool * @len: Length of new descriptor * @start: Start of data + * @ttl: TTL/hop_limit for this packet * @func: For tracing: name of calling function * @line: For tracing: caller line of function call */ void packet_add_do(struct pool *p, size_t len, const char *start, - const char *func, int line) + const uint8_t ttl, const char *func, int line) { size_t idx = p->count; @@ -106,8 +107,9 @@ void packet_add_do(struct pool *p, size_t len, const char *start, if (packet_check_range(p, start, len, func, line)) return; - p->pkt[idx].iov_base = (void *)start; - p->pkt[idx].iov_len = len; + p->pkt[idx].iov.iov_base = (void *)start; + p->pkt[idx].iov.iov_len = len; + p->pkt[idx].ttl = ttl; p->count++; } @@ -125,7 +127,8 @@ void packet_add_do(struct pool *p, size_t len, const char *start, * Return: pointer to start of data range, NULL on invalid range or descriptor */ void *packet_get_try_do(const struct pool *p, size_t idx, size_t offset, - size_t len, size_t *left, const char *func, int line) + size_t len, size_t *left, uint8_t *ttl, + const char *func, int line) { char *ptr; @@ -139,18 +142,21 @@ void *packet_get_try_do(const struct pool *p, size_t idx, size_t offset, return NULL; } - if (offset > p->pkt[idx].iov_len || - len > (p->pkt[idx].iov_len - offset)) + if (offset > p->pkt[idx].iov.iov_len || + len > (p->pkt[idx].iov.iov_len - offset)) return NULL; - ptr = (char *)p->pkt[idx].iov_base + offset; + ptr = (char *)p->pkt[idx].iov.iov_base + offset; ASSERT_WITH_MSG(!packet_check_range(p, ptr, len, func, line), "Corrupt packet pool, %s:%i", func, line); if (left) - *left = p->pkt[idx].iov_len - offset - len; + *left = p->pkt[idx].iov.iov_len - offset - len; + if (ttl) + *ttl = p->pkt[idx].ttl; +; return ptr; } @@ -168,14 +174,14 @@ void *packet_get_try_do(const struct pool *p, size_t idx, size_t offset, */ void *packet_get_do(const struct pool *p, const size_t idx, size_t offset, size_t len, size_t *left, - const char *func, int line) + uint8_t *ttl, const char *func, int line) { - void *r = packet_get_try_do(p, idx, offset, len, left, func, line); + void *r = packet_get_try_do(p, idx, offset, len, left, ttl, func, line); if (!r) { trace("missing packet data length %zu, offset %zu from " "length %zu, %s:%i", - len, offset, p->pkt[idx].iov_len, func, line); + len, offset, p->pkt[idx].iov.iov_len, func, line); } return r; diff --git a/packet.h b/packet.h index c94780a..1f5142c 100644 --- a/packet.h +++ b/packet.h @@ -11,6 +11,8 @@ /* Maximum size of a single packet stored in pool, including headers */ #define PACKET_MAX_LEN ((size_t)UINT16_MAX) +#define DEFAULT_TTL 64 + /** * struct pool - Generic pool of packets stored in a buffer * @buf: Buffer storing packet descriptors, @@ -26,28 +28,36 @@ struct pool { size_t buf_size; size_t size; size_t count; - struct iovec pkt[]; + struct { + struct iovec iov; + uint8_t ttl; + uint8_t pad[3]; + } pkt[]; }; int vu_packet_check_range(void *buf, const char *ptr, size_t len); void packet_add_do(struct pool *p, size_t len, const char *start, - const char *func, int line); + const uint8_t ttl, const char *func, int line); void *packet_get_try_do(const struct pool *p, const size_t idx, size_t offset, size_t len, size_t *left, - const char *func, int line); + uint8_t *ttl, const char *func, int line); void *packet_get_do(const struct pool *p, const size_t idx, size_t offset, size_t len, size_t *left, - const char *func, int line); + uint8_t *ttl, const char *func, int line); bool pool_full(const struct pool *p); void pool_flush(struct pool *p); #define packet_add(p, len, start) \ - packet_add_do(p, len, start, __func__, __LINE__) + packet_add_do(p, len, start, DEFAULT_TTL, __func__, __LINE__) +#define packet_add_ttl(p, len, start, ttl) \ + packet_add_do(p, len, start, ttl, __func__, __LINE__) #define packet_get_try(p, idx, offset, len, left) \ - packet_get_try_do(p, idx, offset, len, left, __func__, __LINE__) + packet_get_try_do(p, idx, offset, len, left, NULL, __func__, __LINE__) #define packet_get(p, idx, offset, len, left) \ - packet_get_do(p, idx, offset, len, left, __func__, __LINE__) + packet_get_do(p, idx, offset, len, left, NULL, __func__, __LINE__) +#define packet_get_ttl(p, idx, offset, len, left, ttl) \ + packet_get_do(p, idx, offset, len, left, ttl, __func__, __LINE__) #define PACKET_POOL_DECL(_name, _size, _buf) \ struct _name ## _t { \ @@ -55,7 +65,11 @@ struct _name ## _t { \ size_t buf_size; \ size_t size; \ size_t count; \ - struct iovec pkt[_size]; \ + struct { \ + struct iovec iov; \ + uint8_t ttl; \ + uint8_t pad[3]; \ + } pkt[_size]; \ } #define PACKET_POOL_INIT_NOCAST(_size, _buf, _buf_size) \ diff --git a/tap.c b/tap.c index 3a6fcbe..ac9b3df 100644 --- a/tap.c +++ b/tap.c @@ -563,6 +563,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf); * @dest: Destination port * @saddr: Source address * @daddr: Destination address + * @ttl: TTL/hop_limit for packet * @msg: Array of messages that can be handled in a single call */ static struct tap4_l4_t { @@ -821,7 +822,7 @@ resume: #undef L4_SET append: - packet_add((struct pool *)&seq->p, l4len, l4h); + packet_add_ttl((struct pool *)&seq->p, l4len, l4h, iph->ttl); } for (j = 0, seq = tap4_l4; j < seq_count; j++, seq++) { diff --git a/udp.c b/udp.c index 39431d7..5fbba49 100644 --- a/udp.c +++ b/udp.c @@ -859,8 +859,10 @@ fail: */ int udp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, - const struct pool *p, int idx, const struct timespec *now) + const struct pool *p, int idx, + const struct timespec *now) { + char ancillary[CMSG_SPACE(sizeof(int))]; const struct flowside *toside; struct mmsghdr mm[UIO_MAXIOV]; union sockaddr_inany to_sa; @@ -885,7 +887,9 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, src = ntohs(uh->source); dst = ntohs(uh->dest); - tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst, now); + tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, + src, dst, now); + if (!(uflow = udp_at_sidx(tosidx))) { char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN]; @@ -915,8 +919,9 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, for (i = 0; i < (int)p->count - idx; i++) { struct udphdr *uh_send; size_t len; + uint8_t ttl; - uh_send = packet_get(p, idx + i, 0, sizeof(*uh), &len); + uh_send = packet_get_ttl(p, idx + i, 0, sizeof(*uh), &len, &ttl); if (!uh_send) return p->count - idx; @@ -926,7 +931,6 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, if (len) { m[i].iov_base = (char *)(uh_send + 1); m[i].iov_len = len; - mm[i].msg_hdr.msg_iov = m + i; mm[i].msg_hdr.msg_iovlen = 1; } else { @@ -938,6 +942,22 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, mm[i].msg_hdr.msg_controllen = 0; mm[i].msg_hdr.msg_flags = 0; + if (ttl != DEFAULT_TTL) { + struct cmsghdr *cmsg = (void *) ancillary; + + if (af == AF_INET) { + cmsg->cmsg_level = IPPROTO_IP; + cmsg->cmsg_type = IP_TTL; + } else { + cmsg->cmsg_level = IPPROTO_IPV6; + cmsg->cmsg_type = IPV6_HOPLIMIT; + } + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + *((int *) CMSG_DATA(cmsg)) = ttl; + mm[i].msg_hdr.msg_control = ancillary; + mm[i].msg_hdr.msg_controllen = sizeof(ancillary); + } + count++; } diff --git a/udp.h b/udp.h index de2df6d..6adbfcd 100644 --- a/udp.h +++ b/udp.h @@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now); int udp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, - const struct pool *p, int idx, const struct timespec *now); + const struct pool *p, int idx, + const struct timespec *now); int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr, const char *ifname, in_port_t port); int udp_init(struct ctx *c); -- 2.48.1