Clamp the TCP retry timeout as Linux kernel does. If RTO is less
than 3 seconds, re-initialize it to 3 seconds for data retransmissions
according to RFC 6298.
Suggested-by: Stefano Brivio
Signed-off-by: Yumei Huang
---
tcp.c | 25 ++++++++++++++++++++-----
tcp.h | 2 ++
2 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/tcp.c b/tcp.c
index 96ee56a..84a6700 100644
--- a/tcp.c
+++ b/tcp.c
@@ -187,6 +187,9 @@
* for established connections, or (tcp_syn_retries +
* tcp_syn_linear_timeouts) times during the handshake, reset the connection
*
+ * - RTO_INIT_ACK: if the RTO is less than this, re-initialize RTO to this for
+ * data retransmissions.
+ *
* - FIN_TIMEOUT: if a FIN segment was sent to tap/guest (flag ACK_FROM_TAP_DUE
* with TAP_FIN_SENT event), and no ACK is received within this time, reset
* the connection
@@ -340,6 +343,7 @@ enum {
#define ACK_INTERVAL 10 /* ms */
#define RTO_INIT 1 /* s, RFC 6298 */
+#define RTO_INIT_ACK 3 /* s, RFC 6298 */
#define FIN_TIMEOUT 60
#define ACT_TIMEOUT 7200
@@ -365,9 +369,11 @@ uint8_t tcp_migrate_rcv_queue [TCP_MIGRATE_RCV_QUEUE_MAX];
#define TCP_SYN_RETRIES "/proc/sys/net/ipv4/tcp_syn_retries"
#define TCP_SYN_LINEAR_TIMEOUTS "/proc/sys/net/ipv4/tcp_syn_linear_timeouts"
+#define TCP_RTO_MAX_MS "/proc/sys/net/ipv4/tcp_rto_max_ms"
#define TCP_SYN_RETRIES_DEFAULT 6
#define TCP_SYN_LINEAR_TIMEOUTS_DEFAULT 4
+#define TCP_RTO_MAX_MS_DEFAULT 120000
/* "Extended" data (not stored in the flow table) for TCP flow migration */
static struct tcp_tap_transfer_ext migrate_ext[FLOW_MAX];
@@ -585,10 +591,13 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
if (conn->flags & ACK_TO_TAP_DUE) {
it.it_value.tv_nsec = (long)ACK_INTERVAL * 1000 * 1000;
} else if (conn->flags & ACK_FROM_TAP_DUE) {
- int exp = conn->retries;
+ int exp = conn->retries, timeout = RTO_INIT;
if (!(conn->events & ESTABLISHED))
exp -= c->tcp.syn_linear_timeouts;
- it.it_value.tv_sec = RTO_INIT << MAX(exp, 0);
+ else
+ timeout = MAX(timeout, RTO_INIT_ACK);
+ timeout <<= MAX(exp, 0);
+ it.it_value.tv_sec = MIN(timeout, c->tcp.tcp_rto_max);
} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
it.it_value.tv_sec = FIN_TIMEOUT;
} else {
@@ -2785,18 +2794,24 @@ static socklen_t tcp_probe_tcp_info(void)
*/
void tcp_get_rto_params(struct ctx *c)
{
- intmax_t tcp_syn_retries, syn_linear_timeouts;
+ intmax_t tcp_syn_retries, syn_linear_timeouts, tcp_rto_max_ms;
tcp_syn_retries = read_file_integer(
TCP_SYN_RETRIES, TCP_SYN_RETRIES_DEFAULT);
syn_linear_timeouts = read_file_integer(
TCP_SYN_LINEAR_TIMEOUTS, TCP_SYN_LINEAR_TIMEOUTS_DEFAULT);
+ tcp_rto_max_ms = read_file_integer(
+ TCP_RTO_MAX_MS, TCP_RTO_MAX_MS_DEFAULT);
c->tcp.tcp_syn_retries = MIN(tcp_syn_retries, UINT8_MAX);
c->tcp.syn_linear_timeouts = MIN(syn_linear_timeouts, UINT8_MAX);
+ c->tcp.tcp_rto_max = MIN(
+ DIV_ROUND_CLOSEST(tcp_rto_max_ms, 1000), SIZE_MAX);
- debug("Read sysctl values tcp_syn_retries: %"PRIu8", linear_timeouts: %"PRIu8,
- c->tcp.tcp_syn_retries, c->tcp.syn_linear_timeouts);
+ debug("Read sysctl values tcp_syn_retries: %"PRIu8
+ ", linear_timeouts: %"PRIu8", tcp_rto_max: %zu",
+ c->tcp.tcp_syn_retries, c->tcp.syn_linear_timeouts,
+ c->tcp.tcp_rto_max);
}
/**
diff --git a/tcp.h b/tcp.h
index befedde..a238bb7 100644
--- a/tcp.h
+++ b/tcp.h
@@ -59,6 +59,7 @@ union tcp_listen_epoll_ref {
* @fwd_out: Port forwarding configuration for outbound packets
* @timer_run: Timestamp of most recent timer run
* @pipe_size: Size of pipes for spliced connections
+ * @tcp_rto_max: Maximal retry timeout (in s)
* @tcp_syn_retries: SYN retries using exponential backoff timeout
* @syn_linear_timeouts: SYN retries before using exponential backoff timeout
*/
@@ -67,6 +68,7 @@ struct tcp_ctx {
struct fwd_ports fwd_out;
struct timespec timer_run;
size_t pipe_size;
+ size_t tcp_rto_max;
uint8_t tcp_syn_retries;
uint8_t syn_linear_timeouts;
};
--
2.49.0