OSDN Git Service

rcu: Allow expedited RCU CPU stall warnings to dump task stacks
authorPaul E. McKenney <paulmck@kernel.org>
Tue, 20 Dec 2022 02:02:20 +0000 (18:02 -0800)
committerPaul E. McKenney <paulmck@kernel.org>
Wed, 4 Jan 2023 01:47:44 +0000 (17:47 -0800)
This commit introduces the rcupdate.rcu_exp_stall_task_details kernel
boot parameter, which cause expedited RCU CPU stall warnings to dump
the stacks of any tasks blocking the current expedited grace period.

Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Documentation/admin-guide/kernel-parameters.txt
kernel/rcu/rcu.h
kernel/rcu/tree_exp.h
kernel/rcu/update.c

index 6cfa6e3..aa453f9 100644 (file)
                        rcupdate.rcu_cpu_stall_timeout to be used (after
                        conversion from seconds to milliseconds).
 
+       rcupdate.rcu_exp_stall_task_details= [KNL]
+                       Print stack dumps of any tasks blocking the
+                       current expedited RCU grace period during an
+                       expedited RCU CPU stall warning.
+
        rcupdate.rcu_expedited= [KNL]
                        Use expedited grace-period primitives, for
                        example, synchronize_rcu_expedited() instead
index c5aa934..fa640c4 100644 (file)
@@ -224,6 +224,7 @@ extern int rcu_cpu_stall_ftrace_dump;
 extern int rcu_cpu_stall_suppress;
 extern int rcu_cpu_stall_timeout;
 extern int rcu_exp_cpu_stall_timeout;
+extern bool rcu_exp_stall_task_details __read_mostly;
 int rcu_jiffies_till_stall_check(void);
 int rcu_exp_jiffies_till_stall_check(void);
 
index 927abaf..249c296 100644 (file)
@@ -11,6 +11,7 @@
 
 static void rcu_exp_handler(void *unused);
 static int rcu_print_task_exp_stall(struct rcu_node *rnp);
+static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp);
 
 /*
  * Record the start of an expedited grace period.
@@ -671,6 +672,7 @@ static void synchronize_rcu_expedited_wait(void)
                                dump_cpu_task(cpu);
                                preempt_enable();
                        }
+                       rcu_exp_print_detail_task_stall_rnp(rnp);
                }
                jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
                panic_on_rcu_stall();
@@ -813,6 +815,36 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
        return ndetected;
 }
 
+/*
+ * Scan the current list of tasks blocked within RCU read-side critical
+ * sections, dumping the stack of each that is blocking the current
+ * expedited grace period.
+ */
+static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
+{
+       unsigned long flags;
+       struct task_struct *t;
+
+       if (!rcu_exp_stall_task_details)
+               return;
+       raw_spin_lock_irqsave_rcu_node(rnp, flags);
+       if (!READ_ONCE(rnp->exp_tasks)) {
+               raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+               return;
+       }
+       t = list_entry(rnp->exp_tasks->prev,
+                      struct task_struct, rcu_node_entry);
+       list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+               /*
+                * We could be printing a lot while holding a spinlock.
+                * Avoid triggering hard lockup.
+                */
+               touch_nmi_watchdog();
+               sched_show_task(t);
+       }
+       raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+}
+
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
 /* Request an expedited quiescent state. */
@@ -885,6 +917,15 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
        return 0;
 }
 
+/*
+ * Because preemptible RCU does not exist, we never have to print out
+ * tasks blocked within RCU read-side critical sections that are blocking
+ * the current expedited grace period.
+ */
+static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
+{
+}
+
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
 /**
index 587b97c..6ed5020 100644 (file)
@@ -509,6 +509,8 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
 module_param(rcu_cpu_stall_timeout, int, 0644);
 int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
 module_param(rcu_exp_cpu_stall_timeout, int, 0644);
+bool rcu_exp_stall_task_details __read_mostly;
+module_param(rcu_exp_stall_task_details, bool, 0644);
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
 
 // Suppress boot-time RCU CPU stall warnings and rcutorture writer stall