From e73e81975f2447e6f556100cada64a18ec631cbb Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Tue, 31 Jul 2018 21:12:22 +0900 Subject: [PATCH] sched/debug: Fix potential deadlock when writing to sched_features The following lockdep report can be triggered by writing to /sys/kernel/debug/sched_features: ====================================================== WARNING: possible circular locking dependency detected 4.18.0-rc6-00152-gcd3f77d74ac3-dirty #18 Not tainted ------------------------------------------------------ sh/3358 is trying to acquire lock: 000000004ad3989d (cpu_hotplug_lock.rw_sem){++++}, at: static_key_enable+0x14/0x30 but task is already holding lock: 00000000c1b31a88 (&sb->s_type->i_mutex_key#3){+.+.}, at: sched_feat_write+0x160/0x428 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&sb->s_type->i_mutex_key#3){+.+.}: lock_acquire+0xb8/0x148 down_write+0xac/0x140 start_creating+0x5c/0x168 debugfs_create_dir+0x18/0x220 opp_debug_register+0x8c/0x120 _add_opp_dev+0x104/0x1f8 dev_pm_opp_get_opp_table+0x174/0x340 _of_add_opp_table_v2+0x110/0x760 dev_pm_opp_of_add_table+0x5c/0x240 dev_pm_opp_of_cpumask_add_table+0x5c/0x100 cpufreq_init+0x160/0x430 cpufreq_online+0x1cc/0xe30 cpufreq_add_dev+0x78/0x198 subsys_interface_register+0x168/0x270 cpufreq_register_driver+0x1c8/0x278 dt_cpufreq_probe+0xdc/0x1b8 platform_drv_probe+0xb4/0x168 driver_probe_device+0x318/0x4b0 __device_attach_driver+0xfc/0x1f0 bus_for_each_drv+0xf8/0x180 __device_attach+0x164/0x200 device_initial_probe+0x10/0x18 bus_probe_device+0x110/0x178 device_add+0x6d8/0x908 platform_device_add+0x138/0x3d8 platform_device_register_full+0x1cc/0x1f8 cpufreq_dt_platdev_init+0x174/0x1bc do_one_initcall+0xb8/0x310 kernel_init_freeable+0x4b8/0x56c kernel_init+0x10/0x138 ret_from_fork+0x10/0x18 -> #2 (opp_table_lock){+.+.}: lock_acquire+0xb8/0x148 __mutex_lock+0x104/0xf50 mutex_lock_nested+0x1c/0x28 _of_add_opp_table_v2+0xb4/0x760 dev_pm_opp_of_add_table+0x5c/0x240 dev_pm_opp_of_cpumask_add_table+0x5c/0x100 cpufreq_init+0x160/0x430 cpufreq_online+0x1cc/0xe30 cpufreq_add_dev+0x78/0x198 subsys_interface_register+0x168/0x270 cpufreq_register_driver+0x1c8/0x278 dt_cpufreq_probe+0xdc/0x1b8 platform_drv_probe+0xb4/0x168 driver_probe_device+0x318/0x4b0 __device_attach_driver+0xfc/0x1f0 bus_for_each_drv+0xf8/0x180 __device_attach+0x164/0x200 device_initial_probe+0x10/0x18 bus_probe_device+0x110/0x178 device_add+0x6d8/0x908 platform_device_add+0x138/0x3d8 platform_device_register_full+0x1cc/0x1f8 cpufreq_dt_platdev_init+0x174/0x1bc do_one_initcall+0xb8/0x310 kernel_init_freeable+0x4b8/0x56c kernel_init+0x10/0x138 ret_from_fork+0x10/0x18 -> #1 (subsys mutex#6){+.+.}: lock_acquire+0xb8/0x148 __mutex_lock+0x104/0xf50 mutex_lock_nested+0x1c/0x28 subsys_interface_register+0xd8/0x270 cpufreq_register_driver+0x1c8/0x278 dt_cpufreq_probe+0xdc/0x1b8 platform_drv_probe+0xb4/0x168 driver_probe_device+0x318/0x4b0 __device_attach_driver+0xfc/0x1f0 bus_for_each_drv+0xf8/0x180 __device_attach+0x164/0x200 device_initial_probe+0x10/0x18 bus_probe_device+0x110/0x178 device_add+0x6d8/0x908 platform_device_add+0x138/0x3d8 platform_device_register_full+0x1cc/0x1f8 cpufreq_dt_platdev_init+0x174/0x1bc do_one_initcall+0xb8/0x310 kernel_init_freeable+0x4b8/0x56c kernel_init+0x10/0x138 ret_from_fork+0x10/0x18 -> #0 (cpu_hotplug_lock.rw_sem){++++}: __lock_acquire+0x203c/0x21d0 lock_acquire+0xb8/0x148 cpus_read_lock+0x58/0x1c8 static_key_enable+0x14/0x30 sched_feat_write+0x314/0x428 full_proxy_write+0xa0/0x138 __vfs_write+0xd8/0x388 vfs_write+0xdc/0x318 ksys_write+0xb4/0x138 sys_write+0xc/0x18 __sys_trace_return+0x0/0x4 other info that might help us debug this: Chain exists of: cpu_hotplug_lock.rw_sem --> opp_table_lock --> &sb->s_type->i_mutex_key#3 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sb->s_type->i_mutex_key#3); lock(opp_table_lock); lock(&sb->s_type->i_mutex_key#3); lock(cpu_hotplug_lock.rw_sem); *** DEADLOCK *** 2 locks held by sh/3358: #0: 00000000a8c4b363 (sb_writers#10){.+.+}, at: vfs_write+0x238/0x318 #1: 00000000c1b31a88 (&sb->s_type->i_mutex_key#3){+.+.}, at: sched_feat_write+0x160/0x428 stack backtrace: CPU: 5 PID: 3358 Comm: sh Not tainted 4.18.0-rc6-00152-gcd3f77d74ac3-dirty #18 Hardware name: Renesas H3ULCB Kingfisher board based on r8a7795 ES2.0+ (DT) Call trace: dump_backtrace+0x0/0x288 show_stack+0x14/0x20 dump_stack+0x13c/0x1ac print_circular_bug.isra.10+0x270/0x438 check_prev_add.constprop.16+0x4dc/0xb98 __lock_acquire+0x203c/0x21d0 lock_acquire+0xb8/0x148 cpus_read_lock+0x58/0x1c8 static_key_enable+0x14/0x30 sched_feat_write+0x314/0x428 full_proxy_write+0xa0/0x138 __vfs_write+0xd8/0x388 vfs_write+0xdc/0x318 ksys_write+0xb4/0x138 sys_write+0xc/0x18 __sys_trace_return+0x0/0x4 This is because when loading the cpufreq_dt module we first acquire cpu_hotplug_lock.rw_sem lock, then in cpufreq_init(), we are taking the &sb->s_type->i_mutex_key lock. But when writing to /sys/kernel/debug/sched_features, the cpu_hotplug_lock.rw_sem lock depends on the &sb->s_type->i_mutex_key lock. To fix this bug, reverse the lock acquisition order when writing to sched_features, this way cpu_hotplug_lock.rw_sem no longer depends on &sb->s_type->i_mutex_key. Tested-by: Dietmar Eggemann Signed-off-by: Jiada Wang Signed-off-by: Peter Zijlstra (Intel) Cc: Eugeniu Rosca Cc: George G. Davis Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180731121222.26195-1-jiada_wang@mentor.com Signed-off-by: Ingo Molnar --- kernel/sched/debug.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 60caf1fb94e057..6383aa6a60ca02 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -89,12 +89,12 @@ struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { static void sched_feat_disable(int i) { - static_key_disable(&sched_feat_keys[i]); + static_key_disable_cpuslocked(&sched_feat_keys[i]); } static void sched_feat_enable(int i) { - static_key_enable(&sched_feat_keys[i]); + static_key_enable_cpuslocked(&sched_feat_keys[i]); } #else static void sched_feat_disable(int i) { }; @@ -146,9 +146,11 @@ sched_feat_write(struct file *filp, const char __user *ubuf, /* Ensure the static_key remains in a consistent state */ inode = file_inode(filp); + cpus_read_lock(); inode_lock(inode); ret = sched_feat_set(cmp); inode_unlock(inode); + cpus_read_unlock(); if (ret < 0) return ret;