#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched.h>
+#include <linux/sched/smt.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/oom.h>
bool rollback;
bool single;
bool bringup;
+ bool booted_once;
struct hlist_node *node;
enum cpuhp_state cb_state;
int result;
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
+static struct lock_class_key cpuhp_state_key;
+static struct lockdep_map cpuhp_state_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+#endif
+
/**
* cpuhp_step - Hotplug state machine step
* @name: Name of the step
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
#endif /* CONFIG_HOTPLUG_CPU */
+/*
+ * Architectures that need SMT-specific errata handling during SMT hotplug
+ * should override this.
+ */
+void __weak arch_smt_update(void) { }
+
+#ifdef CONFIG_HOTPLUG_SMT
+enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+EXPORT_SYMBOL_GPL(cpu_smt_control);
+
+static bool cpu_smt_available __read_mostly;
+
+void __init cpu_smt_disable(bool force)
+{
+ if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
+ cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+ return;
+
+ if (force) {
+ pr_info("SMT: Force disabled\n");
+ cpu_smt_control = CPU_SMT_FORCE_DISABLED;
+ } else {
+ cpu_smt_control = CPU_SMT_DISABLED;
+ }
+}
+
+/*
+ * The decision whether SMT is supported can only be done after the full
+ * CPU identification. Called from architecture code before non boot CPUs
+ * are brought up.
+ */
+void __init cpu_smt_check_topology_early(void)
+{
+ if (!topology_smt_supported())
+ cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
+/*
+ * If SMT was disabled by BIOS, detect it here, after the CPUs have been
+ * brought online. This ensures the smt/l1tf sysfs entries are consistent
+ * with reality. cpu_smt_available is set to true during the bringup of non
+ * boot CPUs when a SMT sibling is detected. Note, this may overwrite
+ * cpu_smt_control's previous setting.
+ */
+void __init cpu_smt_check_topology(void)
+{
+ if (!cpu_smt_available)
+ cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
+static int __init smt_cmdline_disable(char *str)
+{
+ cpu_smt_disable(str && !strcmp(str, "force"));
+ return 0;
+}
+early_param("nosmt", smt_cmdline_disable);
+
+static inline bool cpu_smt_allowed(unsigned int cpu)
+{
+ if (topology_is_primary_thread(cpu))
+ return true;
+
+ /*
+ * If the CPU is not a 'primary' thread and the booted_once bit is
+ * set then the processor has SMT support. Store this information
+ * for the late check of SMT support in cpu_smt_check_topology().
+ */
+ if (per_cpu(cpuhp_state, cpu).booted_once)
+ cpu_smt_available = true;
+
+ if (cpu_smt_control == CPU_SMT_ENABLED)
+ return true;
+
+ /*
+ * On x86 it's required to boot all logical CPUs at least once so
+ * that the init code can get a chance to set CR4.MCE on each
+ * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
+ * core will shutdown the machine.
+ */
+ return !per_cpu(cpuhp_state, cpu).booted_once;
+}
+#else
+static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+#endif
+
/* Need to know about CPUs going up/down? */
int register_cpu_notifier(struct notifier_block *nb)
{
return 0;
}
+static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st);
+
static int bringup_wait_for_ap(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
wait_for_completion(&st->done);
+ if (WARN_ON_ONCE((!cpu_online(cpu))))
+ return -ECANCELED;
+
+ /* Unpark the stopper thread and the hotplug thread of the target cpu */
+ stop_machine_unpark(cpu);
+ kthread_unpark(st->thread);
+
+ /*
+ * SMT soft disabling on X86 requires to bring the CPU out of the
+ * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
+ * CPU marked itself as booted_once in cpu_notify_starting() so the
+ * cpu_smt_allowed() check will now return false if this is not the
+ * primary sibling.
+ */
+ if (!cpu_smt_allowed(cpu))
+ return -ECANCELED;
+
+ /* Should we go further up ? */
+ if (st->target > CPUHP_AP_ONLINE_IDLE) {
+ __cpuhp_kick_ap_work(st);
+ wait_for_completion(&st->done);
+ }
return st->result;
}
cpu_notify(CPU_UP_CANCELED, cpu);
return ret;
}
- ret = bringup_wait_for_ap(cpu);
- BUG_ON(!cpu_online(cpu));
- return ret;
+ return bringup_wait_for_ap(cpu);
}
/*
}
}
+static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
+{
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
+ return true;
+ /*
+ * When CPU hotplug is disabled, then taking the CPU down is not
+ * possible because takedown_cpu() and the architecture and
+ * subsystem specific mechanisms are not available. So the CPU
+ * which would be completely unplugged again needs to stay around
+ * in the current state.
+ */
+ return st->state <= CPUHP_BRINGUP_CPU;
+}
+
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
st->state++;
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
if (ret) {
- st->target = prev_state;
- undo_cpu_up(cpu, st);
+ if (can_rollback_cpu(st)) {
+ st->target = prev_state;
+ undo_cpu_up(cpu, st);
+ }
break;
}
}
st->should_run = false;
+ lock_map_acquire(&cpuhp_state_lock_map);
/* Single callback invocation for [un]install ? */
if (st->single) {
if (st->cb_state < CPUHP_AP_ONLINE) {
else if (st->state > st->target)
ret = cpuhp_ap_offline(cpu, st);
}
+ lock_map_release(&cpuhp_state_lock_map);
st->result = ret;
complete(&st->done);
}
if (!cpu_online(cpu))
return 0;
+ lock_map_acquire(&cpuhp_state_lock_map);
+ lock_map_release(&cpuhp_state_lock_map);
+
/*
* If we are up and running, use the hotplug thread. For early calls
* we invoke the thread function directly.
enum cpuhp_state state = st->state;
trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
+ lock_map_acquire(&cpuhp_state_lock_map);
+ lock_map_release(&cpuhp_state_lock_map);
__cpuhp_kick_ap_work(st);
wait_for_completion(&st->done);
trace_cpuhp_exit(cpu, st->state, state, st->result);
kthread_unpark(this_cpu_read(cpuhp_state.thread));
}
-#ifdef CONFIG_HOTPLUG_CPU
EXPORT_SYMBOL(register_cpu_notifier);
EXPORT_SYMBOL(__register_cpu_notifier);
void unregister_cpu_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(__unregister_cpu_notifier);
+#ifdef CONFIG_HOTPLUG_CPU
/**
* clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
* @cpu: a CPU id
/* Park the smpboot threads */
kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
- smpboot_park_threads(cpu);
/*
* Prevent irq alloc/free while the dying cpu reorganizes the
/* This post dead nonsense must die */
if (!ret && hasdied)
cpu_notify_nofail(CPU_POST_DEAD, cpu);
+ arch_smt_update();
return ret;
}
+static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
+{
+ if (cpu_hotplug_disabled)
+ return -EBUSY;
+ return _cpu_down(cpu, 0, target);
+}
+
static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
{
int err;
cpu_maps_update_begin();
-
- if (cpu_hotplug_disabled) {
- err = -EBUSY;
- goto out;
- }
-
- err = _cpu_down(cpu, 0, target);
-
-out:
+ err = cpu_down_maps_locked(cpu, target);
cpu_maps_update_done();
return err;
}
enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
+ st->booted_once = true;
while (st->state < target) {
st->state++;
cpuhp_invoke_callback(cpu, st->state, true, NULL);
}
/*
- * Called from the idle task. We need to set active here, so we can kick off
- * the stopper thread and unpark the smpboot threads. If the target state is
- * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
- * cpu further.
+ * Called from the idle task. Wake up the controlling task which brings the
+ * stopper and the hotplug thread of the upcoming CPU up and then delegates
+ * the rest of the online bringup to the hotplug thread.
*/
void cpuhp_online_idle(enum cpuhp_state state)
{
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
- unsigned int cpu = smp_processor_id();
/* Happens for the boot cpu */
if (state != CPUHP_AP_ONLINE_IDLE)
return;
st->state = CPUHP_AP_ONLINE_IDLE;
-
- /* Unpark the stopper thread and the hotplug thread of this cpu */
- stop_machine_unpark(cpu);
- kthread_unpark(st->thread);
-
- /* Should we go further up ? */
- if (st->target > CPUHP_AP_ONLINE_IDLE)
- __cpuhp_kick_ap_work(st);
- else
- complete(&st->done);
+ complete(&st->done);
}
/* Requires cpu_add_remove_lock to be held */
ret = cpuhp_up_callbacks(cpu, st, target);
out:
cpu_hotplug_done();
+ arch_smt_update();
return ret;
}
err = -EBUSY;
goto out;
}
+ if (!cpu_smt_allowed(cpu)) {
+ err = -EPERM;
+ goto out;
+ }
err = _cpu_up(cpu, 0, target);
out:
* before blk_mq_queue_reinit_notify() from notify_dead(),
* otherwise a RCU stall occurs.
*/
- [CPUHP_TIMERS_DEAD] = {
+ [CPUHP_TIMERS_PREPARE] = {
.name = "timers:dead",
- .startup.single = NULL,
+ .startup.single = timers_prepare_cpu,
.teardown.single = timers_dead_cpu,
},
/* Kicks the plugged cpu into life */
.teardown.single = NULL,
.cant_stop = true,
},
- [CPUHP_AP_SMPCFD_DYING] = {
- .name = "smpcfd:dying",
- .startup.single = NULL,
- .teardown.single = smpcfd_dying_cpu,
- },
/*
* Handled on controll processor until the plugged processor manages
* this itself.
.startup.single = NULL,
.teardown.single = rcutree_dying_cpu,
},
+ [CPUHP_AP_SMPCFD_DYING] = {
+ .name = "smpcfd:dying",
+ .startup.single = NULL,
+ .teardown.single = smpcfd_dying_cpu,
+ },
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
[CPUHP_AP_SMPBOOT_THREADS] = {
.name = "smpboot/threads:online",
.startup.single = smpboot_unpark_threads,
- .teardown.single = NULL,
+ .teardown.single = smpboot_park_threads,
},
[CPUHP_AP_PERF_ONLINE] = {
.name = "perf:online",
/* (Un)Install the callbacks for further cpu hotplug operations */
struct cpuhp_step *sp;
- mutex_lock(&cpuhp_state_mutex);
sp = cpuhp_get_step(state);
sp->startup.single = startup;
sp->teardown.single = teardown;
sp->name = name;
sp->multi_instance = multi_instance;
INIT_HLIST_HEAD(&sp->list);
- mutex_unlock(&cpuhp_state_mutex);
}
static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
{
enum cpuhp_state i;
- mutex_lock(&cpuhp_state_mutex);
for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) {
if (cpuhp_ap_states[i].name)
continue;
cpuhp_ap_states[i].name = "Reserved";
- mutex_unlock(&cpuhp_state_mutex);
return i;
}
- mutex_unlock(&cpuhp_state_mutex);
WARN(1, "No more dynamic states available for CPU hotplug\n");
return -ENOSPC;
}
return -EINVAL;
get_online_cpus();
+ mutex_lock(&cpuhp_state_mutex);
if (!invoke || !sp->startup.multi)
goto add_node;
}
add_node:
ret = 0;
- mutex_lock(&cpuhp_state_mutex);
hlist_add_head(node, &sp->list);
- mutex_unlock(&cpuhp_state_mutex);
err:
+ mutex_unlock(&cpuhp_state_mutex);
put_online_cpus();
return ret;
}
return -EINVAL;
get_online_cpus();
+ mutex_lock(&cpuhp_state_mutex);
/* currently assignments for the ONLINE state are possible */
if (state == CPUHP_AP_ONLINE_DYN) {
}
}
out:
+ mutex_unlock(&cpuhp_state_mutex);
+
put_online_cpus();
if (!ret && dyn_state)
return state;
return -EINVAL;
get_online_cpus();
+ mutex_lock(&cpuhp_state_mutex);
+
if (!invoke || !cpuhp_get_teardown_cb(state))
goto remove;
/*
}
remove:
- mutex_lock(&cpuhp_state_mutex);
hlist_del(node);
mutex_unlock(&cpuhp_state_mutex);
put_online_cpus();
BUG_ON(cpuhp_cb_check(state));
get_online_cpus();
+ mutex_lock(&cpuhp_state_mutex);
if (sp->multi_instance) {
WARN(!hlist_empty(&sp->list),
}
remove:
cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
+ mutex_unlock(&cpuhp_state_mutex);
put_online_cpus();
}
EXPORT_SYMBOL(__cpuhp_remove_state);
ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
mutex_unlock(&cpuhp_state_mutex);
if (ret)
- return ret;
+ goto out;
if (st->state < target)
ret = do_cpu_up(dev->id, target);
else
ret = do_cpu_down(dev->id, target);
-
+out:
unlock_device_hotplug();
return ret ? ret : count;
}
NULL
};
+#ifdef CONFIG_HOTPLUG_SMT
+
+static const char *smt_states[] = {
+ [CPU_SMT_ENABLED] = "on",
+ [CPU_SMT_DISABLED] = "off",
+ [CPU_SMT_FORCE_DISABLED] = "forceoff",
+ [CPU_SMT_NOT_SUPPORTED] = "notsupported",
+};
+
+static ssize_t
+show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
+}
+
+static void cpuhp_offline_cpu_device(unsigned int cpu)
+{
+ struct device *dev = get_cpu_device(cpu);
+
+ dev->offline = true;
+ /* Tell user space about the state change */
+ kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
+}
+
+static void cpuhp_online_cpu_device(unsigned int cpu)
+{
+ struct device *dev = get_cpu_device(cpu);
+
+ dev->offline = false;
+ /* Tell user space about the state change */
+ kobject_uevent(&dev->kobj, KOBJ_ONLINE);
+}
+
+int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
+{
+ int cpu, ret = 0;
+
+ cpu_maps_update_begin();
+ for_each_online_cpu(cpu) {
+ if (topology_is_primary_thread(cpu))
+ continue;
+ ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
+ if (ret)
+ break;
+ /*
+ * As this needs to hold the cpu maps lock it's impossible
+ * to call device_offline() because that ends up calling
+ * cpu_down() which takes cpu maps lock. cpu maps lock
+ * needs to be held as this might race against in kernel
+ * abusers of the hotplug machinery (thermal management).
+ *
+ * So nothing would update device:offline state. That would
+ * leave the sysfs entry stale and prevent onlining after
+ * smt control has been changed to 'off' again. This is
+ * called under the sysfs hotplug lock, so it is properly
+ * serialized against the regular offline usage.
+ */
+ cpuhp_offline_cpu_device(cpu);
+ }
+ if (!ret) {
+ cpu_smt_control = ctrlval;
+ arch_smt_update();
+ }
+ cpu_maps_update_done();
+ return ret;
+}
+
+int cpuhp_smt_enable(void)
+{
+ int cpu, ret = 0;
+
+ cpu_maps_update_begin();
+ cpu_smt_control = CPU_SMT_ENABLED;
+ arch_smt_update();
+ for_each_present_cpu(cpu) {
+ /* Skip online CPUs and CPUs on offline nodes */
+ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
+ continue;
+ ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
+ if (ret)
+ break;
+ /* See comment in cpuhp_smt_disable() */
+ cpuhp_online_cpu_device(cpu);
+ }
+ cpu_maps_update_done();
+ return ret;
+}
+
+static ssize_t
+store_smt_control(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ctrlval, ret;
+
+ if (sysfs_streq(buf, "on"))
+ ctrlval = CPU_SMT_ENABLED;
+ else if (sysfs_streq(buf, "off"))
+ ctrlval = CPU_SMT_DISABLED;
+ else if (sysfs_streq(buf, "forceoff"))
+ ctrlval = CPU_SMT_FORCE_DISABLED;
+ else
+ return -EINVAL;
+
+ if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
+ return -EPERM;
+
+ if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+ return -ENODEV;
+
+ ret = lock_device_hotplug_sysfs();
+ if (ret)
+ return ret;
+
+ if (ctrlval != cpu_smt_control) {
+ switch (ctrlval) {
+ case CPU_SMT_ENABLED:
+ ret = cpuhp_smt_enable();
+ break;
+ case CPU_SMT_DISABLED:
+ case CPU_SMT_FORCE_DISABLED:
+ ret = cpuhp_smt_disable(ctrlval);
+ break;
+ }
+ }
+
+ unlock_device_hotplug();
+ return ret ? ret : count;
+}
+static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
+
+static ssize_t
+show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ bool active = topology_max_smt_threads() > 1;
+
+ return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
+}
+static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
+
+static struct attribute *cpuhp_smt_attrs[] = {
+ &dev_attr_control.attr,
+ &dev_attr_active.attr,
+ NULL
+};
+
+static const struct attribute_group cpuhp_smt_attr_group = {
+ .attrs = cpuhp_smt_attrs,
+ .name = "smt",
+ NULL
+};
+
+static int __init cpu_smt_state_init(void)
+{
+ return sysfs_create_group(&cpu_subsys.dev_root->kobj,
+ &cpuhp_smt_attr_group);
+}
+
+#else
+static inline int cpu_smt_state_init(void) { return 0; }
+#endif
+
static int __init cpuhp_sysfs_init(void)
{
int cpu, ret;
+ ret = cpu_smt_state_init();
+ if (ret)
+ return ret;
+
ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
&cpuhp_cpu_root_attr_group);
if (ret)
/*
* Must be called _AFTER_ setting up the per_cpu areas
*/
-void __init boot_cpu_state_init(void)
+void __init boot_cpu_hotplug_init(void)
+{
+#ifdef CONFIG_SMP
+ this_cpu_write(cpuhp_state.booted_once, true);
+#endif
+ this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
+}
+
+enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
+
+static int __init mitigations_parse_cmdline(char *arg)
{
- per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
+ if (!strcmp(arg, "off"))
+ cpu_mitigations = CPU_MITIGATIONS_OFF;
+ else if (!strcmp(arg, "auto"))
+ cpu_mitigations = CPU_MITIGATIONS_AUTO;
+ else if (!strcmp(arg, "auto,nosmt"))
+ cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
+ else
+ pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
+ arg);
+
+ return 0;
}
+early_param("mitigations", mitigations_parse_cmdline);