At the moment these take separate pointers to the tap specific and IP headers, but expect the TCP header and payload as a single tcp_payload_t. As well as being slightly inconsistent, this involves some slightly iffy pointer shenanigans when called on the flags path with a tcp_flags_t instead of a tcp_payload_t. More importantly, it's inconvenient for the upcoming vhost-user case, where the TCP header and payload might not be contiguous. Furthermore, the payload itself might not be contiguous. So, pass the TCP header as its own pointer, and the TCP payload as an IO vector. Signed-off-by: David Gibson <david(a)gibson.dropbear.id.au> --- tcp.c | 56 +++++++++++++++++++++------------------------ tcp_buf.c | 17 +++++++------- tcp_internal.h | 6 +++-- tcp_vu.c | 61 ++++++++++++++++++++++++++------------------------ 4 files changed, 71 insertions(+), 69 deletions(-) diff --git a/tcp.c b/tcp.c index cf395c8..5e26243 100644 --- a/tcp.c +++ b/tcp.c @@ -916,21 +916,25 @@ static void tcp_fill_header(struct tcphdr *th, * @conn: Connection pointer * @taph: tap backend specific header * @iph: Pointer to IPv4 header - * @bp: Pointer to TCP header followed by TCP payload - * @dlen: TCP payload length + * @th: Pointer to TCP header + * @iov: IO vector containing payload + * @iov_cnt: Number of entries in @iov + * @doffset: Offset of the TCP payload within @iov * @check: Checksum, if already known * @seq: Sequence number for this segment * @no_tcp_csum: Do not set TCP checksum */ void tcp_fill_headers4(const struct tcp_tap_conn *conn, struct tap_hdr *taph, struct iphdr *iph, - struct tcp_payload_t *bp, size_t dlen, + struct tcphdr *th, + const struct iovec *iov, size_t iov_cnt, size_t doffset, const uint16_t *check, uint32_t seq, bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); const struct in_addr *src4 = inany_v4(&tapside->oaddr); const struct in_addr *dst4 = inany_v4(&tapside->eaddr); - size_t l4len = dlen + sizeof(bp->th); + size_t dlen = iov_size(iov, iov_cnt) - doffset; + size_t l4len = dlen + sizeof(*th); size_t l3len = l4len + sizeof(*iph); ASSERT(src4 && dst4); @@ -942,18 +946,12 @@ void tcp_fill_headers4(const struct tcp_tap_conn *conn, iph->check = check ? *check : csum_ip4_header(l3len, IPPROTO_TCP, *src4, *dst4); - tcp_fill_header(&bp->th, conn, seq); + tcp_fill_header(th, conn, seq); - if (no_tcp_csum) { - bp->th.check = 0; - } else { - const struct iovec iov = { - .iov_base = bp->data, - .iov_len = dlen, - }; - - tcp_update_check_tcp4(iph, &bp->th, &iov, 1, 0); - } + if (no_tcp_csum) + th->check = 0; + else + tcp_update_check_tcp4(iph, th, iov, iov_cnt, doffset); tap_hdr_update(taph, l3len + sizeof(struct ethhdr)); } @@ -963,19 +961,23 @@ void tcp_fill_headers4(const struct tcp_tap_conn *conn, * @conn: Connection pointer * @taph: tap backend specific header * @ip6h: Pointer to IPv6 header - * @bp: Pointer to TCP header followed by TCP payload - * @dlen: TCP payload length + * @th: Pointer to TCP header + * @iov: IO vector containing payload + * @iov_cnt: Number of entries in @iov + * @doffset: Offset of the TCP payload within @iov * @check: Checksum, if already known * @seq: Sequence number for this segment * @no_tcp_csum: Do not set TCP checksum */ void tcp_fill_headers6(const struct tcp_tap_conn *conn, struct tap_hdr *taph, struct ipv6hdr *ip6h, - struct tcp_payload_t *bp, size_t dlen, + struct tcphdr *th, + const struct iovec *iov, size_t iov_cnt, size_t doffset, uint32_t seq, bool no_tcp_csum) { const struct flowside *tapside = TAPFLOW(conn); - size_t l4len = dlen + sizeof(bp->th); + size_t dlen = iov_size(iov, iov_cnt) - doffset; + size_t l4len = dlen + sizeof(*th); ip6h->payload_len = htons(l4len); ip6h->saddr = tapside->oaddr.a6; @@ -989,18 +991,12 @@ void tcp_fill_headers6(const struct tcp_tap_conn *conn, ip6h->flow_lbl[1] = (conn->sock >> 8) & 0xff; ip6h->flow_lbl[2] = (conn->sock >> 0) & 0xff; - tcp_fill_header(&bp->th, conn, seq); + tcp_fill_header(th, conn, seq); - if (no_tcp_csum) { - bp->th.check = 0; - } else { - const struct iovec iov = { - .iov_base = bp->data, - .iov_len = dlen, - }; - - tcp_update_check_tcp6(ip6h, &bp->th, &iov, 1, 0); - } + if (no_tcp_csum) + th->check = 0; + else + tcp_update_check_tcp6(ip6h, th, iov, iov_cnt, doffset); tap_hdr_update(taph, l4len + sizeof(*ip6h) + sizeof(struct ethhdr)); } diff --git a/tcp_buf.c b/tcp_buf.c index 52a19ed..0e6b67d 100644 --- a/tcp_buf.c +++ b/tcp_buf.c @@ -190,28 +190,29 @@ void tcp_payload_flush(const struct ctx *c) * tcp_buf_fill_headers() - Fill 802.3, IP, TCP headers in pre-cooked buffers * @conn: Connection pointer * @iov: Pointer to an array of iovec of TCP pre-cooked buffers - * @dlen: TCP payload length * @check: Checksum, if already known * @seq: Sequence number for this segment * @no_tcp_csum: Do not set TCP checksum */ static void tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn, - struct iovec *iov, size_t dlen, - const uint16_t *check, uint32_t seq, - bool no_tcp_csum) + struct iovec *iov, const uint16_t *check, + uint32_t seq, bool no_tcp_csum) { + const struct iovec *tail = &iov[TCP_IOV_PAYLOAD]; const struct flowside *tapside = TAPFLOW(conn); const struct in_addr *a4 = inany_v4(&tapside->oaddr); if (a4) { tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, - iov[TCP_IOV_PAYLOAD].iov_base, dlen, + iov[TCP_IOV_PAYLOAD].iov_base, + tail, 1, sizeof(struct tcphdr), check, seq, no_tcp_csum); } else { tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base, iov[TCP_IOV_IP].iov_base, - iov[TCP_IOV_PAYLOAD].iov_base, dlen, + iov[TCP_IOV_PAYLOAD].iov_base, + tail, 1, sizeof(struct tcphdr), seq, no_tcp_csum); } } @@ -252,7 +253,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) tcp_flags_used++; l4len = optlen + sizeof(struct tcphdr); iov[TCP_IOV_PAYLOAD].iov_len = l4len; - tcp_l2_buf_fill_headers(conn, iov, optlen, NULL, seq, false); + tcp_l2_buf_fill_headers(conn, iov, NULL, seq, false); if (flags & DUP_ACK) { struct iovec *dup_iov; @@ -304,7 +305,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn, iov[TCP_IOV_ETH].iov_base = &tcp6_eth_src; } iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr); - tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq, false); + tcp_l2_buf_fill_headers(conn, iov, check, seq, false); if (++tcp_payload_used > TCP_FRAMES_MEM - 1) tcp_payload_flush(c); } diff --git a/tcp_internal.h b/tcp_internal.h index 934fe9e..8f9267c 100644 --- a/tcp_internal.h +++ b/tcp_internal.h @@ -185,11 +185,13 @@ void tcp_update_check_tcp6(const struct ipv6hdr *ip6h, struct tcphdr *th, size_t doffset); void tcp_fill_headers4(const struct tcp_tap_conn *conn, struct tap_hdr *taph, struct iphdr *iph, - struct tcp_payload_t *bp, size_t dlen, + struct tcphdr *th, + const struct iovec *iov, size_t iov_cnt, size_t doffset, const uint16_t *check, uint32_t seq, bool no_tcp_csum); void tcp_fill_headers6(const struct tcp_tap_conn *conn, struct tap_hdr *taph, struct ipv6hdr *ip6h, - struct tcp_payload_t *bp, size_t dlen, + struct tcphdr *th, + const struct iovec *iov, size_t iov_cnt, size_t doffset, uint32_t seq, bool no_tcp_csum); int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn, diff --git a/tcp_vu.c b/tcp_vu.c index 5073d67..bf45c74 100644 --- a/tcp_vu.c +++ b/tcp_vu.c @@ -97,9 +97,10 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; const struct flowside *tapside = TAPFLOW(conn); size_t l2len, optlen, hdrlen; - struct tcp_flags_t *payload; struct ipv6hdr *ip6h = NULL; + struct tcp_syn_opts *opts; struct iphdr *iph = NULL; + struct tcphdr *th; struct ethhdr *eh; uint32_t seq; int elem_cnt; @@ -129,23 +130,23 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) iph = vu_ip(iov_vu[0].iov_base); *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP); - payload = vu_payloadv4(iov_vu[0].iov_base); + th = vu_payloadv4(iov_vu[0].iov_base); } else { eh->h_proto = htons(ETH_P_IPV6); ip6h = vu_ip(iov_vu[0].iov_base); *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP); - payload = vu_payloadv6(iov_vu[0].iov_base); + th = vu_payloadv6(iov_vu[0].iov_base); } - memset(&payload->th, 0, sizeof(payload->th)); - payload->th.doff = offsetof(struct tcp_flags_t, opts) / 4; - payload->th.ack = 1; + memset(th, 0, sizeof(*th)); + th->doff = offsetof(struct tcp_flags_t, opts) / 4; + th->ack = 1; seq = conn->seq_to_tap; - ret = tcp_prepare_flags(c, conn, flags, &payload->th, - &payload->opts, &optlen); + opts = (struct tcp_syn_opts *)(th + 1); + ret = tcp_prepare_flags(c, conn, flags, th, opts, &optlen); if (ret <= 0) { vu_queue_rewind(vq, 1); return ret; @@ -156,12 +157,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) sizeof(struct virtio_net_hdr_mrg_rxbuf); if (CONN_V4(conn)) { - tcp_fill_headers4(conn, NULL, iph, - (struct tcp_payload_t *)payload, optlen, + tcp_fill_headers4(conn, NULL, iph, th, iov_vu, 1, + (char *)opts - (char *)iov_vu[0].iov_base, NULL, seq, true); } else { - tcp_fill_headers6(conn, NULL, ip6h, - (struct tcp_payload_t *)payload, optlen, + tcp_fill_headers6(conn, NULL, ip6h, th, iov_vu, 1, + (char *)opts - (char *)iov_vu[0].iov_base, seq, true); } @@ -265,20 +266,21 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, * tcp_vu_prepare() - Prepare the packet header * @c: Execution context * @conn: Connection pointer - * @first: Pointer to the array of IO vectors - * @dlen: Packet data length + * @iov: Pointer to the array of IO vectors + * @iov_cnt: Number of entries in @iov * @check: Checksum, if already known */ -static void tcp_vu_prepare(const struct ctx *c, - struct tcp_tap_conn *conn, struct iovec *first, - size_t dlen, const uint16_t **check) +static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn, + struct iovec *iov, size_t iov_cnt, + const uint16_t **check) { const struct flowside *toside = TAPFLOW(conn); - struct tcp_payload_t *payload; - char *base = first->iov_base; + char *base = iov[0].iov_base; struct ipv6hdr *ip6h = NULL; struct iphdr *iph = NULL; + struct tcphdr *th; struct ethhdr *eh; + char *data; /* we guess the first iovec provided by the guest can embed * all the headers needed by L2 frame @@ -292,7 +294,7 @@ static void tcp_vu_prepare(const struct ctx *c, /* initialize header */ if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) { - ASSERT(first[0].iov_len >= sizeof(struct virtio_net_hdr_mrg_rxbuf) + + ASSERT(iov[0].iov_len >= sizeof(struct virtio_net_hdr_mrg_rxbuf) + sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct tcphdr)); @@ -300,9 +302,9 @@ static void tcp_vu_prepare(const struct ctx *c, iph = vu_ip(base); *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_TCP); - payload = vu_payloadv4(base); + th = vu_payloadv4(base); } else { - ASSERT(first[0].iov_len >= sizeof(struct virtio_net_hdr_mrg_rxbuf) + + ASSERT(iov[0].iov_len >= sizeof(struct virtio_net_hdr_mrg_rxbuf) + sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)); @@ -311,19 +313,20 @@ static void tcp_vu_prepare(const struct ctx *c, ip6h = vu_ip(base); *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_TCP); - payload = vu_payloadv6(base); + th = vu_payloadv6(base); } - memset(&payload->th, 0, sizeof(payload->th)); - payload->th.doff = offsetof(struct tcp_payload_t, data) / 4; - payload->th.ack = 1; + memset(th, 0, sizeof(*th)); + th->doff = offsetof(struct tcp_payload_t, data) / 4; + th->ack = 1; + data = (char *)(th + 1); if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) { - tcp_fill_headers4(conn, NULL, iph, payload, dlen, + tcp_fill_headers4(conn, NULL, iph, th, iov, iov_cnt, data - base, *check, conn->seq_to_tap, true); *check = &iph->check; } else { - tcp_fill_headers6(conn, NULL, ip6h, payload, dlen, + tcp_fill_headers6(conn, NULL, ip6h, th, iov, iov_cnt, data - base, conn->seq_to_tap, true); } } @@ -440,7 +443,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) /* restore first iovec base: point to vnet header */ vu_set_vnethdr(vdev, first, num_buffers, l2_hdrlen); - tcp_vu_prepare(c, conn, first, frame_size, &check); + tcp_vu_prepare(c, conn, first, num_buffers, &check); if (*c->pcap) { tcp_vu_update_check(tapside, first, num_buffers); pcap_iov(first, num_buffers, -- 2.47.0