OSDN Git Service

watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work
authorPeter Zijlstra <peterz@infradead.org>
Thu, 7 Jun 2018 08:52:03 +0000 (10:52 +0200)
committerIngo Molnar <mingo@kernel.org>
Tue, 3 Jul 2018 07:20:43 +0000 (09:20 +0200)
Oleg suggested to replace the "watchdog/%u" threads with
cpu_stop_work. That removes one thread per CPU while at the same time
fixes softlockup vs SCHED_DEADLINE.

But more importantly, it does away with the single
smpboot_update_cpumask_percpu_thread() user, which allows
cleanups/shrinkage of the smpboot interface.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/cpuhotplug.h
include/linux/nmi.h
kernel/cpu.c
kernel/watchdog.c

index 8796ba3..4cf06a6 100644 (file)
@@ -164,6 +164,7 @@ enum cpuhp_state {
        CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
        CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
        CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+       CPUHP_AP_WATCHDOG_ONLINE,
        CPUHP_AP_WORKQUEUE_ONLINE,
        CPUHP_AP_RCUTREE_ONLINE,
        CPUHP_AP_ONLINE_DYN,
index b8d868d..80664bb 100644 (file)
@@ -33,10 +33,15 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
 #define sysctl_hardlockup_all_cpu_backtrace 0
 #endif /* !CONFIG_SMP */
 
+extern int lockup_detector_online_cpu(unsigned int cpu);
+extern int lockup_detector_offline_cpu(unsigned int cpu);
+
 #else /* CONFIG_LOCKUP_DETECTOR */
 static inline void lockup_detector_init(void) { }
 static inline void lockup_detector_soft_poweroff(void) { }
 static inline void lockup_detector_cleanup(void) { }
+#define lockup_detector_online_cpu     NULL
+#define lockup_detector_offline_cpu    NULL
 #endif /* !CONFIG_LOCKUP_DETECTOR */
 
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
index 0db8938..191097c 100644 (file)
@@ -1344,6 +1344,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
                .startup.single         = perf_event_init_cpu,
                .teardown.single        = perf_event_exit_cpu,
        },
+       [CPUHP_AP_WATCHDOG_ONLINE] = {
+               .name                   = "lockup_detector:online",
+               .startup.single         = lockup_detector_online_cpu,
+               .teardown.single        = lockup_detector_offline_cpu,
+       },
        [CPUHP_AP_WORKQUEUE_ONLINE] = {
                .name                   = "workqueue:online",
                .startup.single         = workqueue_online_cpu,
index 576d180..b81f777 100644 (file)
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/sysctl.h>
-#include <linux/smpboot.h>
-#include <linux/sched/rt.h>
-#include <uapi/linux/sched/types.h>
 #include <linux/tick.h>
-#include <linux/workqueue.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/isolation.h>
+#include <linux/stop_machine.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
-#include <linux/kthread.h>
 
 static DEFINE_MUTEX(watchdog_mutex);
 
@@ -169,11 +165,10 @@ static void lockup_detector_update_enable(void)
 unsigned int __read_mostly softlockup_panic =
                        CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
 
-static bool softlockup_threads_initialized __read_mostly;
+static bool softlockup_initialized __read_mostly;
 static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
-static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
@@ -335,6 +330,25 @@ static void watchdog_interrupt_count(void)
        __this_cpu_inc(hrtimer_interrupts);
 }
 
+/*
+ * The watchdog thread function - touches the timestamp.
+ *
+ * It only runs once every sample_period seconds (4 seconds by
+ * default) to reset the softlockup timestamp. If this gets delayed
+ * for more than 2*watchdog_thresh seconds then the debug-printout
+ * triggers in watchdog_timer_fn().
+ */
+static int softlockup_fn(void *data)
+{
+       __this_cpu_write(soft_lockup_hrtimer_cnt,
+                        __this_cpu_read(hrtimer_interrupts));
+       __touch_watchdog();
+
+       return 0;
+}
+
+static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
+
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -350,7 +364,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
        watchdog_interrupt_count();
 
        /* kick the softlockup detector */
-       wake_up_process(__this_cpu_read(softlockup_watchdog));
+       stop_one_cpu_nowait(smp_processor_id(),
+                       softlockup_fn, NULL,
+                       this_cpu_ptr(&softlockup_stop_work));
 
        /* .. and repeat */
        hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
@@ -448,17 +464,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
        return HRTIMER_RESTART;
 }
 
-static void watchdog_set_prio(unsigned int policy, unsigned int prio)
-{
-       struct sched_param param = { .sched_priority = prio };
-
-       sched_setscheduler(current, policy, &param);
-}
-
 static void watchdog_enable(unsigned int cpu)
 {
        struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
 
+       WARN_ON_ONCE(cpu != smp_processor_id());
+
        /*
         * Start the timer first to prevent the NMI watchdog triggering
         * before the timer has a chance to fire.
@@ -473,15 +484,14 @@ static void watchdog_enable(unsigned int cpu)
        /* Enable the perf event */
        if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
                watchdog_nmi_enable(cpu);
-
-       watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
 }
 
 static void watchdog_disable(unsigned int cpu)
 {
        struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
 
-       watchdog_set_prio(SCHED_NORMAL, 0);
+       WARN_ON_ONCE(cpu != smp_processor_id());
+
        /*
         * Disable the perf event first. That prevents that a large delay
         * between disabling the timer and disabling the perf event causes
@@ -491,77 +501,63 @@ static void watchdog_disable(unsigned int cpu)
        hrtimer_cancel(hrtimer);
 }
 
-static void watchdog_cleanup(unsigned int cpu, bool online)
+static int softlockup_stop_fn(void *data)
 {
-       watchdog_disable(cpu);
+       watchdog_disable(smp_processor_id());
+       return 0;
 }
 
-static int watchdog_should_run(unsigned int cpu)
+static void softlockup_stop_all(void)
 {
-       return __this_cpu_read(hrtimer_interrupts) !=
-               __this_cpu_read(soft_lockup_hrtimer_cnt);
+       int cpu;
+
+       if (!softlockup_initialized)
+               return;
+
+       for_each_cpu(cpu, &watchdog_allowed_mask)
+               smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
+
+       cpumask_clear(&watchdog_allowed_mask);
 }
 
-/*
- * The watchdog thread function - touches the timestamp.
- *
- * It only runs once every sample_period seconds (4 seconds by
- * default) to reset the softlockup timestamp. If this gets delayed
- * for more than 2*watchdog_thresh seconds then the debug-printout
- * triggers in watchdog_timer_fn().
- */
-static void watchdog(unsigned int cpu)
+static int softlockup_start_fn(void *data)
 {
-       __this_cpu_write(soft_lockup_hrtimer_cnt,
-                        __this_cpu_read(hrtimer_interrupts));
-       __touch_watchdog();
+       watchdog_enable(smp_processor_id());
+       return 0;
 }
 
-static struct smp_hotplug_thread watchdog_threads = {
-       .store                  = &softlockup_watchdog,
-       .thread_should_run      = watchdog_should_run,
-       .thread_fn              = watchdog,
-       .thread_comm            = "watchdog/%u",
-       .setup                  = watchdog_enable,
-       .cleanup                = watchdog_cleanup,
-       .park                   = watchdog_disable,
-       .unpark                 = watchdog_enable,
-};
-
-static void softlockup_update_smpboot_threads(void)
+static void softlockup_start_all(void)
 {
-       lockdep_assert_held(&watchdog_mutex);
-
-       if (!softlockup_threads_initialized)
-               return;
+       int cpu;
 
-       smpboot_update_cpumask_percpu_thread(&watchdog_threads,
-                                            &watchdog_allowed_mask);
+       cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
+       for_each_cpu(cpu, &watchdog_allowed_mask)
+               smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false);
 }
 
-/* Temporarily park all watchdog threads */
-static void softlockup_park_all_threads(void)
+int lockup_detector_online_cpu(unsigned int cpu)
 {
-       cpumask_clear(&watchdog_allowed_mask);
-       softlockup_update_smpboot_threads();
+       watchdog_enable(cpu);
+       return 0;
 }
 
-/* Unpark enabled threads */
-static void softlockup_unpark_threads(void)
+int lockup_detector_offline_cpu(unsigned int cpu)
 {
-       cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
-       softlockup_update_smpboot_threads();
+       watchdog_disable(cpu);
+       return 0;
 }
 
 static void lockup_detector_reconfigure(void)
 {
        cpus_read_lock();
        watchdog_nmi_stop();
-       softlockup_park_all_threads();
+
+       softlockup_stop_all();
        set_sample_period();
        lockup_detector_update_enable();
        if (watchdog_enabled && watchdog_thresh)
-               softlockup_unpark_threads();
+               softlockup_start_all();
+
        watchdog_nmi_start();
        cpus_read_unlock();
        /*
@@ -580,8 +576,6 @@ static void lockup_detector_reconfigure(void)
  */
 static __init void lockup_detector_setup(void)
 {
-       int ret;
-
        /*
         * If sysctl is off and watchdog got disabled on the command line,
         * nothing to do here.
@@ -592,24 +586,13 @@ static __init void lockup_detector_setup(void)
            !(watchdog_enabled && watchdog_thresh))
                return;
 
-       ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
-                                                    &watchdog_allowed_mask);
-       if (ret) {
-               pr_err("Failed to initialize soft lockup detector threads\n");
-               return;
-       }
-
        mutex_lock(&watchdog_mutex);
-       softlockup_threads_initialized = true;
        lockup_detector_reconfigure();
+       softlockup_initialized = true;
        mutex_unlock(&watchdog_mutex);
 }
 
 #else /* CONFIG_SOFTLOCKUP_DETECTOR */
-static inline int watchdog_park_threads(void) { return 0; }
-static inline void watchdog_unpark_threads(void) { }
-static inline int watchdog_enable_all_cpus(void) { return 0; }
-static inline void watchdog_disable_all_cpus(void) { }
 static void lockup_detector_reconfigure(void)
 {
        cpus_read_lock();