From 29734bdd903cd7a21d0c09f53da4bb29e6db72ab Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 6 Aug 2024 15:41:31 +0800 Subject: [PATCH] sched: make printk safe when rq lock is held The dead lock can happen if we try to use printk(), such as a call of SCHED_WARN_ON(), during the rq->__lock is held. The printk() will try to print the message to the console, and the console driver can call queue_work_on(), which will try to obtain rq->__lock again. This means that any WARN during the kernel function that hold the rq->__lock, such as schedule(), sched_ttwu_pending(), etc, can cause dead lock. Following is the call trace of the deadlock case that I encounter: PID: 0 TASK: ff36bfda010c8000 CPU: 156 COMMAND: "swapper/156" #0 crash_nmi_callback+30 #1 nmi_handle+85 #2 default_do_nmi+66 #3 exc_nmi+291 #4 end_repeat_nmi+22 [exception RIP: native_queued_spin_lock_slowpath+96] #5 native_queued_spin_lock_slowpath+96 #6 _raw_spin_lock+30 #7 ttwu_queue+111 #8 try_to_wake_up+375 #9 __queue_work+462 #10 queue_work_on+32 #11 soft_cursor+420 #12 bit_cursor+898 #13 hide_cursor+39 #14 vt_console_print+995 #15 call_console_drivers.constprop.0+204 #16 console_unlock+374 #17 vprintk_emit+280 #18 printk+88 #19 __warn_printk+71 #20 enqueue_task_fair+1779 #21 activate_task+102 #22 ttwu_do_activate+155 #23 sched_ttwu_pending+177 #24 flush_smp_call_function_from_idle+42 #25 do_idle+161 #26 cpu_startup_entry+25 #27 secondary_startup_64_no_verify+194 Fix this by using __printk_safe_enter()/__printk_safe_exit() in rq_pin_lock()/rq_unpin_lock(). Then, printk will defer to print out the buffers to the console. Signed-off-by: Menglong Dong Signed-off-by: Bin Lai --- kernel/sched/sched.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c74ef008f3ae27..a8ffecbe3a09fb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1702,6 +1702,7 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) { rf->cookie = lockdep_pin_lock(__rq_lockp(rq)); + __printk_safe_enter(); #ifdef CONFIG_SCHED_DEBUG rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); rf->clock_update_flags = 0; @@ -1718,6 +1719,7 @@ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) rf->clock_update_flags = RQCF_UPDATED; #endif + __printk_safe_exit(); lockdep_unpin_lock(__rq_lockp(rq), rf->cookie); }