From: Stefano Brivio
Signed-off-by: Stefano Brivio
Signed-off-by: David Gibson
---
flow.c | 36 ++++++++++++++++++++++++++++++++++++
flow.h | 2 ++
migrate.c | 9 +++++++++
tcp.c | 30 ++++++++++++++++++++++++++++++
tcp_conn.h | 1 +
5 files changed, 78 insertions(+)
diff --git a/flow.c b/flow.c
index dbe57082..4807653a 100644
--- a/flow.c
+++ b/flow.c
@@ -956,6 +956,42 @@ static int flow_migrate_repair_all(struct ctx *c, bool enable)
return 0;
}
+/**
+ * flow_migrate_source_early() - Early tasks: shrink (RFC 7323 2.2) TCP windows
+ * @c: Execution context
+ * @stage: Migration stage information, unused
+ * @fd: Migration file descriptor, unused
+ *
+ * Return: 0 on success, positive error code on failure
+ */
+int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage,
+ int fd)
+{
+ union flow *flow;
+ unsigned i;
+ int rc;
+
+ (void)stage;
+ (void)fd;
+
+ /* We need repair mode to dump and set (some) window parameters */
+ if ((rc = flow_migrate_repair_all(c, true)))
+ return -rc;
+
+ foreach_tcp_flow(i, flow, FLOW_MAX) {
+ if ((rc = tcp_flow_migrate_shrink_window(i, &flow->tcp))) {
+ err("Shrinking window, flow %u: %s", i, strerror_(-rc));
+ return flow_migrate_source_rollback(c, i, -rc);
+ }
+ }
+
+ /* Now send window updates. We'll flip repair mode back on in a bit */
+ if ((rc = flow_migrate_repair_all(c, false)))
+ return -rc;
+
+ return 0;
+}
+
/**
* flow_migrate_source_pre() - Prepare flows for migration: enable repair mode
* @c: Execution context
diff --git a/flow.h b/flow.h
index a485c359..675726eb 100644
--- a/flow.h
+++ b/flow.h
@@ -249,6 +249,8 @@ union flow;
void flow_init(void);
void flow_defer_handler(const struct ctx *c, const struct timespec *now);
+int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage,
+ int fd);
int flow_migrate_source_pre(struct ctx *c, const struct migrate_stage *stage,
int fd);
int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage,
diff --git a/migrate.c b/migrate.c
index d035deda..b4e53b45 100644
--- a/migrate.c
+++ b/migrate.c
@@ -98,6 +98,15 @@ static int seen_addrs_target_v1(struct ctx *c,
/* Stages for version 1 */
static const struct migrate_stage stages_v1[] = {
+ /* FIXME: With this step, close() in tcp_flow_migrate_source_ext()
+ * *sometimes* closes the connection for real.
+ */
+/* {
+ .name = "shrink TCP windows",
+ .source = flow_migrate_source_early,
+ .target = NULL,
+ },
+*/
{
.name = "observed addresses",
.source = seen_addrs_source_v1,
diff --git a/tcp.c b/tcp.c
index c3db1696..94bb059a 100644
--- a/tcp.c
+++ b/tcp.c
@@ -3061,6 +3061,36 @@ static int tcp_flow_repair_wnd(int s, uint32_t *snd_wl1, uint32_t *snd_wnd,
return 0;
}
+/**
+ * tcp_flow_migrate_shrink_window() - Dump window data, decrease socket window
+ * @fidx: Flow index
+ * @conn: Pointer to the TCP connection structure
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int tcp_flow_migrate_shrink_window(int fidx, const struct tcp_tap_conn *conn)
+{
+ struct tcp_tap_transfer_ext *t = &migrate_ext[fidx];
+ struct tcp_repair_window wnd;
+ socklen_t sl = sizeof(wnd);
+ int s = conn->sock;
+
+ if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &((int){ 0 }), sizeof(int)))
+ debug("TCP: failed to set SO_RCVBUF to minimum value");
+
+ /* Dump window data as it is for the target, before touching stuff */
+ tcp_flow_repair_wnd(s, &t->sock_snd_wl1, &t->sock_snd_wnd,
+ &t->sock_max_window, &t->sock_rcv_wnd,
+ &t->sock_rcv_wup, &wnd);
+
+ wnd.rcv_wnd = 0;
+
+ if (setsockopt(s, IPPROTO_TCP, TCP_REPAIR_WINDOW, &wnd, sl))
+ debug_perror("Setting window repair data, socket %i", s);
+
+ return 0;
+}
+
/**
* tcp_flow_migrate_source() - Send data (flow table part) for a single flow
* @c: Execution context
diff --git a/tcp_conn.h b/tcp_conn.h
index 1c920a6e..b64e857a 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -227,6 +227,7 @@ bool tcp_flow_defer(const struct tcp_tap_conn *conn);
int tcp_flow_repair_on(struct ctx *c, const struct tcp_tap_conn *conn);
int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn);
+int tcp_flow_migrate_shrink_window(int fidx, const struct tcp_tap_conn *conn);
int tcp_flow_migrate_source(int fd, const struct tcp_tap_conn *conn);
int tcp_flow_migrate_source_ext(int fd, int fidx,
const struct tcp_tap_conn *conn);
--
2.48.1