Skip to content
This repository has been archived by the owner on Apr 18, 2024. It is now read-only.

Commit

Permalink
mptcp: Rework mptcp_disconnect
Browse files Browse the repository at this point in the history
Looking at how we close the subflows, this is very buggy. We are
"cheating" by marking them as non-MPTCP, forcing removal,... It is much
cleaner to reproduce the work from inet_child_forget and properly
disconnect the subflows.

Btw., the way we are doing it at the moment can lead to a deadlock,
because tcp_disconnect can be called while holding the master-socket's
lock (thus, we deadlock in mptcp_disconnect):

INFO: rcu_sched self-detected stall on CPU
	1-...: (20999 ticks this GP) idle=caa/140000000000001/0 softirq=230760/230765 fqs=0
	 (t=21000 jiffies g=98517 c=98516 q=79)
rcu_sched kthread starved for 21000 jiffies! g98517 c98516 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x402 ->cpu=0
rcu_sched       I20840     8      2 0x80000000
[...snip...]
RIP: 0010:rep_nop arch/x86/include/asm/processor.h:655 [inline]
RIP: 0010:cpu_relax arch/x86/include/asm/processor.h:660 [inline]
RIP: 0010:virt_spin_lock arch/x86/include/asm/qspinlock.h:87 [inline]
RIP: 0010:queued_spin_lock_slowpath+0x1d0/0xde0 kernel/locking/qspinlock.c:313
RSP: 0018:ffff88806d1064b8 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff10
RAX: 0000000000000001 RBX: ffff888064dd6f88 RCX: 1ffff1100d8f5ca8
RDX: 0000000000000004 RSI: 0000000000000001 RDI: ffff888064dd6f88
RBP: ffffed100da20cb7 R08: ffff88806a028000 R09: 00000000d1095007
R10: 0000000048bafb09 R11: ffff88804858ff58 R12: 1ffff1100da20c9b
R13: 0000000000000003 R14: dffffc0000000000 R15: ffff88806d1065b8
 spin_lock include/linux/spinlock.h:317 [inline]
 mptcp_disconnect+0x2be/0x610 net/mptcp/mptcp_ctrl.c:1870
 tcp_disconnect+0xf27/0x1230 net/ipv4/tcp.c:2426
 inet_child_forget+0x70/0x310 net/ipv4/inet_connection_sock.c:910
 inet_csk_reqsk_queue_add+0x29f/0x320 net/ipv4/inet_connection_sock.c:939
 inet_csk_complete_hashdance+0xd2/0x1c0 net/ipv4/inet_connection_sock.c:962
 mptcp_check_req_master+0x245/0x2a0 net/mptcp/mptcp_ctrl.c:2071
 tcp_check_req+0x1383/0x17c0 net/ipv4/tcp_minisocks.c:817
 tcp_v6_rcv+0xf3c/0x3640 net/ipv6/tcp_ipv6.c:1561
 ip6_input_finish.constprop.10+0x2fd/0x1140 net/ipv6/ip6_input.c:284
 dst_input include/net/dst.h:465 [inline]
 ip6_rcv_finish+0x144/0x460 net/ipv6/ip6_input.c:71
 NF_HOOK include/linux/netfilter.h:365 [inline]
 ipv6_rcv+0xcaa/0x14a0 net/ipv6/ip6_input.c:208
 __netif_receive_skb_core+0xc85/0x2280 net/core/dev.c:4477
 __netif_receive_skb+0x27/0x1a0 net/core/dev.c:4515
 process_backlog+0x1f8/0x650 net/core/dev.c:5197
 napi_poll net/core/dev.c:5595 [inline]
 net_rx_action+0x68c/0x1520 net/core/dev.c:5661
 __do_softirq+0x1f5/0x6bf kernel/softirq.c:288
 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:996
 </IRQ>
 do_softirq.part.17+0x67/0x70 kernel/softirq.c:332
 do_softirq kernel/softirq.c:324 [inline]
 __local_bh_enable_ip+0x67/0x70 kernel/softirq.c:185
 local_bh_enable include/linux/bottom_half.h:32 [inline]
 rcu_read_unlock_bh include/linux/rcupdate.h:727 [inline]
 ip6_finish_output2+0x980/0x1880 net/ipv6/ip6_output.c:121
 ip6_finish_output net/ipv6/ip6_output.c:154 [inline]
 NF_HOOK_COND include/linux/netfilter.h:357 [inline]
 ip6_output+0x39a/0x6d0 net/ipv6/ip6_output.c:171
 dst_output include/net/dst.h:459 [inline]
 NF_HOOK include/linux/netfilter.h:365 [inline]
 ip6_xmit+0xc7c/0x17d0 net/ipv6/ip6_output.c:275
 inet6_csk_xmit+0x264/0x3f0 net/ipv6/inet6_connection_sock.c:139
 __tcp_transmit_skb+0x16c3/0x2f60 net/ipv4/tcp_output.c:1168
 __tcp_send_ack.part.29+0x328/0x590 net/ipv4/tcp_output.c:3715
 __tcp_send_ack net/ipv4/tcp_output.c:3721 [inline]
 tcp_send_ack+0x6f/0x90 net/ipv4/tcp_output.c:3721
 tcp_rcv_synsent_state_process+0x1c3e/0x2be0 net/ipv4/tcp_input.c:5929
 tcp_rcv_state_process+0x7a4/0x259f net/ipv4/tcp_input.c:6073
 tcp_v6_do_rcv+0x7a4/0x12f0 net/ipv6/tcp_ipv6.c:1381
 sk_backlog_rcv include/net/sock.h:907 [inline]
 __release_sock+0x187/0x2e0 net/core/sock.c:2289
 release_sock+0x99/0x250 net/core/sock.c:2804
 inet_wait_for_connect net/ipv4/af_inet.c:560 [inline]
 __inet_stream_connect+0x60e/0xe30 net/ipv4/af_inet.c:646
 inet_stream_connect+0x53/0xa0 net/ipv4/af_inet.c:685
 SYSC_connect+0x1d3/0x3f0 net/socket.c:1654
 do_syscall_64+0x22e/0x620 arch/x86/entry/common.c:289
 entry_SYSCALL_64_after_hwframe+0x3d/0xa2
RIP: 0033:0x7f4e9e587469
RSP: 002b:00007f4e9ec77dc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 000000000065bf00 RCX: 00007f4e9e587469
RDX: 000000000000001c RSI: 00000000200000c0 RDI: 0000000000000007
RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000040fa84
R13: 0000000000420208 R14: 00007f4e9ec785c0 R15: 00000000ffffffff

The panic is only triggerable since the introduction of
inet_child_forget() with commit ebb516a ("tcp/dccp: fix race at
listener dismantle phase").
Thus, marking it as a fix to the merge with v4.4.

Fixes: b568f57 ("Merge tag 'v4.4' into mptcp_trunk")
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
  • Loading branch information
cpaasch authored and matttbe committed Mar 8, 2019
1 parent 04a77b0 commit 78bab8d
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 37 deletions.
4 changes: 2 additions & 2 deletions include/net/mptcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,7 @@ unsigned int mptcp_xmit_size_goal(const struct sock *meta_sk, u32 mss_now,
int mptcp_init_tw_sock(struct sock *sk, struct tcp_timewait_sock *tw);
void mptcp_twsk_destructor(struct tcp_timewait_sock *tw);
void mptcp_time_wait(struct sock *sk, int state, int timeo);
void mptcp_disconnect(struct sock *sk);
void mptcp_disconnect(struct sock *meta_sk);
bool mptcp_should_expand_sndbuf(const struct sock *sk);
int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb);
void mptcp_tsq_flags(struct sock *sk);
Expand Down Expand Up @@ -1458,7 +1458,7 @@ static inline int mptcp_init_tw_sock(struct sock *sk,
return 0;
}
static inline void mptcp_twsk_destructor(struct tcp_timewait_sock *tw) {}
static inline void mptcp_disconnect(struct sock *sk) {}
static inline void mptcp_disconnect(struct sock *meta_sk) {}
static inline void mptcp_tsq_flags(struct sock *sk) {}
static inline void mptcp_tsq_sub_deferred(struct sock *meta_sk) {}
static inline void mptcp_hash_remove_bh(struct tcp_sock *meta_tp) {}
Expand Down
50 changes: 15 additions & 35 deletions net/mptcp/mptcp_ctrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1619,13 +1619,6 @@ static void mptcp_sub_close_doit(struct sock *sk)
if (sock_flag(sk, SOCK_DEAD))
return;

/* We come from tcp_disconnect. We are sure that meta_sk is set */
if (!mptcp(tp)) {
tp->closing = 1;
tcp_close(sk, 0);
return;
}

if (meta_sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) {
tp->closing = 1;
tcp_close(sk, 0);
Expand Down Expand Up @@ -1933,47 +1926,34 @@ void mptcp_close(struct sock *meta_sk, long timeout)
sock_put(meta_sk); /* Taken by sock_hold */
}

void mptcp_disconnect(struct sock *sk)
void mptcp_disconnect(struct sock *meta_sk)
{
struct tcp_sock *meta_tp = tcp_sk(meta_sk);
struct mptcp_tcp_sock *mptcp;
struct tcp_sock *tp = tcp_sk(sk);
struct hlist_node *tmp;

__skb_queue_purge(&tp->mpcb->reinject_queue);
__skb_queue_purge(&meta_tp->mpcb->reinject_queue);

if (tp->inside_tk_table)
mptcp_hash_remove_bh(tp);
if (meta_tp->inside_tk_table)
mptcp_hash_remove_bh(meta_tp);

local_bh_disable();
mptcp_for_each_sub_safe(tp->mpcb, mptcp, tmp) {
mptcp_for_each_sub_safe(meta_tp->mpcb, mptcp, tmp) {
struct sock *subsk = mptcp_to_sock(mptcp);

/* The socket will get removed from the subsocket-list
* and made non-mptcp by setting mpc to 0.
*
* This is necessary, because tcp_disconnect assumes
* that the connection is completly dead afterwards.
* Thus we need to do a mptcp_del_sock. Due to this call
* we have to make it non-mptcp.
*
* We have to lock the socket, because we set mpc to 0.
* An incoming packet would take the subsocket's lock
* and go on into the receive-path.
* This would be a race.
*/
meta_sk->sk_prot->disconnect(subsk, O_NONBLOCK);

sock_orphan(subsk);

percpu_counter_inc(meta_sk->sk_prot->orphan_count);

bh_lock_sock(subsk);
mptcp_del_sock(subsk);
tcp_sk(subsk)->mpc = 0;
tcp_sk(subsk)->ops = &tcp_specific;
mptcp_sub_force_close(subsk);
bh_unlock_sock(subsk);
inet_csk_destroy_sock(subsk);
}
local_bh_enable();

tp->was_meta_sk = 1;
tp->mpc = 0;
tp->ops = &tcp_specific;
meta_tp->was_meta_sk = 1;
meta_tp->mpc = 0;
meta_tp->ops = &tcp_specific;
}


Expand Down

0 comments on commit 78bab8d

Please sign in to comment.