From 39650211485ab55cb5431cd086acca3d07149576 Mon Sep 17 00:00:00 2001 From: Tim Froidcoeur Date: Wed, 13 Jan 2021 11:42:01 +0100 Subject: [PATCH] mptcp: correct chronos for meta and subflows the chronos that track socket states "sendbuf limited" "busy" and "receive window" limited need to be properly handled by mptcp. Specifically: * send buffer is managed only by the meta, so when space is available in the meta send buffer, meta and subflow "sendbuf limited" chronos must be stopped. * similarly, the chrono for "send buf limited" is started by the mptcp scheduler, both for meta and the available subflow. * receive window limited chrono added to meta window checks in scheduler and mptcp_write_xmit (this was missing). Fixes: 0bc2117826d3 ("Merge tag 'v4.10' into mptcp_trunk") Signed-off-by: Tim Froidcoeur Signed-off-by: Matthieu Baerts (cherry picked from commit 27bf186b677331f130effc20210a6f27882146be) Signed-off-by: Matthieu Baerts (cherry picked from commit 89fabbfa17c7327dd1dc7b2c9bfbffeeb6e964f3) Signed-off-by: Matthieu Baerts --- net/ipv4/tcp_output.c | 3 +++ net/mptcp/mptcp_input.c | 17 +++++++++++++++++ net/mptcp/mptcp_output.c | 10 +++++++++- net/mptcp/mptcp_sched.c | 22 ++++++++++++++++++++-- 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 684ed1364b39d..69737fa96e0cb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1682,8 +1682,11 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) * 2) not cwnd limited (this else condition) * 3) no more data to send (null tcp_send_head ) * 4) application is hitting buffer limit (SOCK_NOSPACE) + * 5) For MPTCP subflows, the scheduler determines + * sndbuf limited. */ if (!tcp_send_head(sk) && sk->sk_socket && + !(mptcp(tcp_sk(sk)) && !is_meta_sk(sk)) && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); diff --git a/net/mptcp/mptcp_input.c b/net/mptcp/mptcp_input.c index a036bcd324eee..2bf66d1c4fffe 100644 --- a/net/mptcp/mptcp_input.c +++ b/net/mptcp/mptcp_input.c @@ -1420,6 +1420,16 @@ static void mptcp_snd_una_update(struct tcp_sock *meta_tp, u32 data_ack) meta_tp->snd_una = data_ack; } +static void mptcp_stop_subflow_chronos(struct sock *meta_sk, + const enum tcp_chrono type) +{ + const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; + struct sock *sk_it; + + mptcp_for_each_sk(mpcb, sk_it) + tcp_chrono_stop(sk_it, type); +} + /* Handle the DATA_ACK */ static bool mptcp_process_data_ack(struct sock *sk, const struct sk_buff *skb) { @@ -1539,6 +1549,13 @@ static bool mptcp_process_data_ack(struct sock *sk, const struct sk_buff *skb) if (meta_sk->sk_socket && test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags)) meta_sk->sk_write_space(meta_sk); + + if (meta_sk->sk_socket && + !test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags)) { + tcp_chrono_stop(meta_sk, TCP_CHRONO_SNDBUF_LIMITED); + mptcp_stop_subflow_chronos(meta_sk, + TCP_CHRONO_SNDBUF_LIMITED); + } } if (meta_sk->sk_state != TCP_ESTABLISHED) { diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c index 3827067e39850..728c0a58356c5 100644 --- a/net/mptcp/mptcp_output.c +++ b/net/mptcp/mptcp_output.c @@ -664,6 +664,7 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp) { struct tcp_sock *meta_tp = tcp_sk(meta_sk), *subtp; + bool is_rwnd_limited = false; struct sock *subsk = NULL; struct mptcp_cb *mpcb = meta_tp->mpcb; struct sk_buff *skb; @@ -707,8 +708,10 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, if (skb_unclone(skb, GFP_ATOMIC)) break; - if (unlikely(!tcp_snd_wnd_test(meta_tp, skb, mss_now))) + if (unlikely(!tcp_snd_wnd_test(meta_tp, skb, mss_now))) { + is_rwnd_limited = true; break; + } /* Force tso_segs to 1 by using UINT_MAX. * We actually don't care about the exact number of segments @@ -782,6 +785,11 @@ bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, break; } + if (is_rwnd_limited) + tcp_chrono_start(meta_sk, TCP_CHRONO_RWND_LIMITED); + else + tcp_chrono_stop(meta_sk, TCP_CHRONO_RWND_LIMITED); + mptcp_for_each_sk(mpcb, subsk) { subtp = tcp_sk(subsk); diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c index 94244720c280a..3ab0c4dfa984d 100644 --- a/net/mptcp/mptcp_sched.c +++ b/net/mptcp/mptcp_sched.c @@ -371,14 +371,21 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject) if (!skb && meta_sk->sk_socket && test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { - struct sock *subsk = get_available_subflow(meta_sk, NULL, - false); + struct sock *subsk; + + /* meta is send buffer limited */ + tcp_chrono_start(meta_sk, TCP_CHRONO_SNDBUF_LIMITED); + + subsk = get_available_subflow(meta_sk, NULL, false); if (!subsk) return NULL; skb = mptcp_rcv_buf_optimization(subsk, 0); if (skb) *reinject = -1; + else + tcp_chrono_start(subsk, + TCP_CHRONO_SNDBUF_LIMITED); } } return skb; @@ -410,6 +417,12 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, mss_now = tcp_current_mss(*subsk); if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) { + /* an active flow is selected, but segment will not be sent due + * to no more space in send window + * this means the meta is receive window limited + * the subflow might also be, if we have nothing to reinject + */ + tcp_chrono_start(meta_sk, TCP_CHRONO_RWND_LIMITED); skb = mptcp_rcv_buf_optimization(*subsk, 1); if (skb) *reinject = -1; @@ -417,6 +430,11 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, return NULL; } + if (!*reinject) { + /* this will stop any other chronos on the meta */ + tcp_chrono_start(meta_sk, TCP_CHRONO_BUSY); + } + /* No splitting required, as we will only send one single segment */ if (skb->len <= mss_now) return skb;