OSDN Git Service

stop_machine: Add function and caller debug info
authorPeter Zijlstra <peterz@infradead.org>
Mon, 21 Sep 2020 10:58:17 +0000 (12:58 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 10 Nov 2020 17:38:57 +0000 (18:38 +0100)
Crashes in stop-machine are hard to connect to the calling code, add a
little something to help with that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Link: https://lkml.kernel.org/r/20201023102346.116513635@infradead.org
include/linux/stop_machine.h
kernel/sched/core.c
kernel/stop_machine.c
lib/dump_stack.c

index 76d8b09..30577c3 100644 (file)
@@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg);
 struct cpu_stop_work {
        struct list_head        list;           /* cpu_stopper->works */
        cpu_stop_fn_t           fn;
+       unsigned long           caller;
        void                    *arg;
        struct cpu_stop_done    *done;
 };
@@ -36,6 +37,8 @@ void stop_machine_park(int cpu);
 void stop_machine_unpark(int cpu);
 void stop_machine_yield(const struct cpumask *cpumask);
 
+extern void print_stop_info(const char *log_lvl, struct task_struct *task);
+
 #else  /* CONFIG_SMP */
 
 #include <linux/workqueue.h>
@@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu,
        return false;
 }
 
+static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { }
+
 #endif /* CONFIG_SMP */
 
 /*
index d2003a7..5e24104 100644 (file)
@@ -6447,6 +6447,7 @@ void sched_show_task(struct task_struct *p)
                (unsigned long)task_thread_info(p)->flags);
 
        print_worker_info(KERN_INFO, p);
+       print_stop_info(KERN_INFO, p);
        show_stack(p, NULL, KERN_INFO);
        put_task_stack(p);
 }
index 865bb02..3cf567c 100644 (file)
@@ -42,11 +42,27 @@ struct cpu_stopper {
        struct list_head        works;          /* list of pending works */
 
        struct cpu_stop_work    stop_work;      /* for stop_cpus */
+       unsigned long           caller;
+       cpu_stop_fn_t           fn;
 };
 
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
 static bool stop_machine_initialized = false;
 
+void print_stop_info(const char *log_lvl, struct task_struct *task)
+{
+       /*
+        * If @task is a stopper task, it cannot migrate and task_cpu() is
+        * stable.
+        */
+       struct cpu_stopper *stopper = per_cpu_ptr(&cpu_stopper, task_cpu(task));
+
+       if (task != stopper->thread)
+               return;
+
+       printk("%sStopper: %pS <- %pS\n", log_lvl, stopper->fn, (void *)stopper->caller);
+}
+
 /* static data for stop_cpus */
 static DEFINE_MUTEX(stop_cpus_mutex);
 static bool stop_cpus_in_progress;
@@ -123,7 +139,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
 {
        struct cpu_stop_done done;
-       struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
+       struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done, .caller = _RET_IP_ };
 
        cpu_stop_init_done(&done, 1);
        if (!cpu_stop_queue_work(cpu, &work))
@@ -331,7 +347,8 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
        work1 = work2 = (struct cpu_stop_work){
                .fn = multi_cpu_stop,
                .arg = &msdata,
-               .done = &done
+               .done = &done,
+               .caller = _RET_IP_,
        };
 
        cpu_stop_init_done(&done, 2);
@@ -367,7 +384,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
 bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
                        struct cpu_stop_work *work_buf)
 {
-       *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
+       *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, .caller = _RET_IP_, };
        return cpu_stop_queue_work(cpu, work_buf);
 }
 
@@ -487,6 +504,8 @@ repeat:
                int ret;
 
                /* cpu stop callbacks must not sleep, make in_atomic() == T */
+               stopper->caller = work->caller;
+               stopper->fn = fn;
                preempt_count_inc();
                ret = fn(arg);
                if (done) {
@@ -495,6 +514,8 @@ repeat:
                        cpu_stop_signal_done(done);
                }
                preempt_count_dec();
+               stopper->fn = NULL;
+               stopper->caller = 0;
                WARN_ONCE(preempt_count(),
                          "cpu_stop: %ps(%p) leaked preempt count\n", fn, arg);
                goto repeat;
index a00ee6e..f5a33b6 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/atomic.h>
 #include <linux/kexec.h>
 #include <linux/utsname.h>
+#include <linux/stop_machine.h>
 
 static char dump_stack_arch_desc_str[128];
 
@@ -57,6 +58,7 @@ void dump_stack_print_info(const char *log_lvl)
                       log_lvl, dump_stack_arch_desc_str);
 
        print_worker_info(log_lvl, current);
+       print_stop_info(log_lvl, current);
 }
 
 /**