OSDN Git Service

cgroup: Disable IRQs while holding css_set_lock
authorDaniel Bristot de Oliveira <bristot@redhat.com>
Wed, 22 Jun 2016 20:28:41 +0000 (17:28 -0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 6 Sep 2019 08:18:11 +0000 (10:18 +0200)
commit 82d6489d0fed2ec8a8c48c19e8d8a04ac8e5bb26 upstream.

While testing the deadline scheduler + cgroup setup I hit this
warning.

[  132.612935] ------------[ cut here ]------------
[  132.612951] WARNING: CPU: 5 PID: 0 at kernel/softirq.c:150 __local_bh_enable_ip+0x6b/0x80
[  132.612952] Modules linked in: (a ton of modules...)
[  132.612981] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.7.0-rc2 #2
[  132.612981] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.2-20150714_191134- 04/01/2014
[  132.612982]  0000000000000086 45c8bb5effdd088b ffff88013fd43da0 ffffffff813d229e
[  132.612984]  0000000000000000 0000000000000000 ffff88013fd43de0 ffffffff810a652b
[  132.612985]  00000096811387b5 0000000000000200 ffff8800bab29d80 ffff880034c54c00
[  132.612986] Call Trace:
[  132.612987]  <IRQ>  [<ffffffff813d229e>] dump_stack+0x63/0x85
[  132.612994]  [<ffffffff810a652b>] __warn+0xcb/0xf0
[  132.612997]  [<ffffffff810e76a0>] ? push_dl_task.part.32+0x170/0x170
[  132.612999]  [<ffffffff810a665d>] warn_slowpath_null+0x1d/0x20
[  132.613000]  [<ffffffff810aba5b>] __local_bh_enable_ip+0x6b/0x80
[  132.613008]  [<ffffffff817d6c8a>] _raw_write_unlock_bh+0x1a/0x20
[  132.613010]  [<ffffffff817d6c9e>] _raw_spin_unlock_bh+0xe/0x10
[  132.613015]  [<ffffffff811388ac>] put_css_set+0x5c/0x60
[  132.613016]  [<ffffffff8113dc7f>] cgroup_free+0x7f/0xa0
[  132.613017]  [<ffffffff810a3912>] __put_task_struct+0x42/0x140
[  132.613018]  [<ffffffff810e776a>] dl_task_timer+0xca/0x250
[  132.613027]  [<ffffffff810e76a0>] ? push_dl_task.part.32+0x170/0x170
[  132.613030]  [<ffffffff8111371e>] __hrtimer_run_queues+0xee/0x270
[  132.613031]  [<ffffffff81113ec8>] hrtimer_interrupt+0xa8/0x190
[  132.613034]  [<ffffffff81051a58>] local_apic_timer_interrupt+0x38/0x60
[  132.613035]  [<ffffffff817d9b0d>] smp_apic_timer_interrupt+0x3d/0x50
[  132.613037]  [<ffffffff817d7c5c>] apic_timer_interrupt+0x8c/0xa0
[  132.613038]  <EOI>  [<ffffffff81063466>] ? native_safe_halt+0x6/0x10
[  132.613043]  [<ffffffff81037a4e>] default_idle+0x1e/0xd0
[  132.613044]  [<ffffffff810381cf>] arch_cpu_idle+0xf/0x20
[  132.613046]  [<ffffffff810e8fda>] default_idle_call+0x2a/0x40
[  132.613047]  [<ffffffff810e92d7>] cpu_startup_entry+0x2e7/0x340
[  132.613048]  [<ffffffff81050235>] start_secondary+0x155/0x190
[  132.613049] ---[ end trace f91934d162ce9977 ]---

The warn is the spin_(lock|unlock)_bh(&css_set_lock) in the interrupt
context. Converting the spin_lock_bh to spin_lock_irq(save) to avoid
this problem - and other problems of sharing a spinlock with an
interrupt.

Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: cgroups@vger.kernel.org
Cc: stable@vger.kernel.org # 4.5+
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: "Luis Claudio R. Goncalves" <lgoncalv@redhat.com>
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Zubin Mithra <zsm@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
kernel/cgroup.c

index 5299618..7a7c535 100644 (file)
@@ -784,6 +784,8 @@ static void put_css_set_locked(struct css_set *cset)
 
 static void put_css_set(struct css_set *cset)
 {
+       unsigned long flags;
+
        /*
         * Ensure that the refcount doesn't hit zero while any readers
         * can see it. Similar to atomic_dec_and_lock(), but for an
@@ -792,9 +794,9 @@ static void put_css_set(struct css_set *cset)
        if (atomic_add_unless(&cset->refcount, -1, 1))
                return;
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irqsave(&css_set_lock, flags);
        put_css_set_locked(cset);
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irqrestore(&css_set_lock, flags);
 }
 
 /*
@@ -1017,11 +1019,11 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 
        /* First see if we already have a cgroup group that matches
         * the desired set */
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        cset = find_existing_css_set(old_cset, cgrp, template);
        if (cset)
                get_css_set(cset);
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        if (cset)
                return cset;
@@ -1049,7 +1051,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
         * find_existing_css_set() */
        memcpy(cset->subsys, template, sizeof(cset->subsys));
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        /* Add reference counts and links from the new css_set. */
        list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
                struct cgroup *c = link->cgrp;
@@ -1075,7 +1077,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
                css_get(css);
        }
 
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        return cset;
 }
@@ -1139,7 +1141,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
         * Release all the links from cset_links to this hierarchy's
         * root cgroup
         */
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
 
        list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
                list_del(&link->cset_link);
@@ -1147,7 +1149,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
                kfree(link);
        }
 
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        if (!list_empty(&root->root_list)) {
                list_del(&root->root_list);
@@ -1551,11 +1553,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
                ss->root = dst_root;
                css->cgroup = dcgrp;
 
-               spin_lock_bh(&css_set_lock);
+               spin_lock_irq(&css_set_lock);
                hash_for_each(css_set_table, i, cset, hlist)
                        list_move_tail(&cset->e_cset_node[ss->id],
                                       &dcgrp->e_csets[ss->id]);
-               spin_unlock_bh(&css_set_lock);
+               spin_unlock_irq(&css_set_lock);
 
                src_root->subsys_mask &= ~(1 << ssid);
                scgrp->subtree_control &= ~(1 << ssid);
@@ -1832,7 +1834,7 @@ static void cgroup_enable_task_cg_lists(void)
 {
        struct task_struct *p, *g;
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
 
        if (use_task_css_set_links)
                goto out_unlock;
@@ -1857,8 +1859,12 @@ static void cgroup_enable_task_cg_lists(void)
                 * entry won't be deleted though the process has exited.
                 * Do it while holding siglock so that we don't end up
                 * racing against cgroup_exit().
+                *
+                * Interrupts were already disabled while acquiring
+                * the css_set_lock, so we do not need to disable it
+                * again when acquiring the sighand->siglock here.
                 */
-               spin_lock_irq(&p->sighand->siglock);
+               spin_lock(&p->sighand->siglock);
                if (!(p->flags & PF_EXITING)) {
                        struct css_set *cset = task_css_set(p);
 
@@ -1867,11 +1873,11 @@ static void cgroup_enable_task_cg_lists(void)
                        list_add_tail(&p->cg_list, &cset->tasks);
                        get_css_set(cset);
                }
-               spin_unlock_irq(&p->sighand->siglock);
+               spin_unlock(&p->sighand->siglock);
        } while_each_thread(g, p);
        read_unlock(&tasklist_lock);
 out_unlock:
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 }
 
 static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -1976,13 +1982,13 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
         * Link the root cgroup in this hierarchy into all the css_set
         * objects.
         */
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        hash_for_each(css_set_table, i, cset, hlist) {
                link_css_set(&tmp_links, cset, root_cgrp);
                if (css_set_populated(cset))
                        cgroup_update_populated(root_cgrp, true);
        }
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        BUG_ON(!list_empty(&root_cgrp->self.children));
        BUG_ON(atomic_read(&root->nr_cgrps) != 1);
@@ -2215,7 +2221,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
        char *path = NULL;
 
        mutex_lock(&cgroup_mutex);
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
 
        root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
 
@@ -2228,7 +2234,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
                        path = buf;
        }
 
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
        mutex_unlock(&cgroup_mutex);
        return path;
 }
@@ -2403,7 +2409,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
         * the new cgroup.  There are no failure cases after here, so this
         * is the commit point.
         */
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        list_for_each_entry(cset, &tset->src_csets, mg_node) {
                list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
                        struct css_set *from_cset = task_css_set(task);
@@ -2414,7 +2420,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
                        put_css_set_locked(from_cset);
                }
        }
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        /*
         * Migration is committed, all target tasks are now on dst_csets.
@@ -2443,13 +2449,13 @@ out_cancel_attach:
                }
        }
 out_release_tset:
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        list_splice_init(&tset->dst_csets, &tset->src_csets);
        list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
                list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
                list_del_init(&cset->mg_node);
        }
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
        return ret;
 }
 
@@ -2466,14 +2472,14 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 
        lockdep_assert_held(&cgroup_mutex);
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
                cset->mg_src_cgrp = NULL;
                cset->mg_dst_cset = NULL;
                list_del_init(&cset->mg_preload_node);
                put_css_set_locked(cset);
        }
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 }
 
 /**
@@ -2623,7 +2629,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
         * already PF_EXITING could be freed from underneath us unless we
         * take an rcu_read_lock.
         */
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        rcu_read_lock();
        task = leader;
        do {
@@ -2632,7 +2638,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
                        break;
        } while_each_thread(leader, task);
        rcu_read_unlock();
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        return cgroup_taskset_migrate(&tset, cgrp);
 }
@@ -2653,7 +2659,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
        int ret;
 
        /* look up all src csets */
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        rcu_read_lock();
        task = leader;
        do {
@@ -2663,7 +2669,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
                        break;
        } while_each_thread(leader, task);
        rcu_read_unlock();
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        /* prepare dst csets and commit */
        ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets);
@@ -2696,9 +2702,9 @@ static int cgroup_procs_write_permission(struct task_struct *task,
                struct cgroup *cgrp;
                struct inode *inode;
 
-               spin_lock_bh(&css_set_lock);
+               spin_lock_irq(&css_set_lock);
                cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
-               spin_unlock_bh(&css_set_lock);
+               spin_unlock_irq(&css_set_lock);
 
                while (!cgroup_is_descendant(dst_cgrp, cgrp))
                        cgrp = cgroup_parent(cgrp);
@@ -2800,9 +2806,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
                if (root == &cgrp_dfl_root)
                        continue;
 
-               spin_lock_bh(&css_set_lock);
+               spin_lock_irq(&css_set_lock);
                from_cgrp = task_cgroup_from_root(from, root);
-               spin_unlock_bh(&css_set_lock);
+               spin_unlock_irq(&css_set_lock);
 
                retval = cgroup_attach_task(from_cgrp, tsk, false);
                if (retval)
@@ -2927,7 +2933,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
        percpu_down_write(&cgroup_threadgroup_rwsem);
 
        /* look up all csses currently attached to @cgrp's subtree */
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
                struct cgrp_cset_link *link;
 
@@ -2939,14 +2945,14 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
                        cgroup_migrate_add_src(link->cset, cgrp,
                                               &preloaded_csets);
        }
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        /* NULL dst indicates self on default hierarchy */
        ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets);
        if (ret)
                goto out_finish;
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
                struct task_struct *task, *ntask;
 
@@ -2958,7 +2964,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
                list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
                        cgroup_taskset_add(task, &tset);
        }
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        ret = cgroup_taskset_migrate(&tset, cgrp);
 out_finish:
@@ -3641,10 +3647,10 @@ static int cgroup_task_count(const struct cgroup *cgrp)
        int count = 0;
        struct cgrp_cset_link *link;
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        list_for_each_entry(link, &cgrp->cset_links, cset_link)
                count += atomic_read(&link->cset->refcount);
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
        return count;
 }
 
@@ -3982,7 +3988,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
        memset(it, 0, sizeof(*it));
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
 
        it->ss = css->ss;
 
@@ -3995,7 +4001,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
        css_task_iter_advance_css_set(it);
 
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 }
 
 /**
@@ -4013,7 +4019,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
                it->cur_task = NULL;
        }
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
 
        if (it->task_pos) {
                it->cur_task = list_entry(it->task_pos, struct task_struct,
@@ -4022,7 +4028,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
                css_task_iter_advance(it);
        }
 
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        return it->cur_task;
 }
@@ -4036,10 +4042,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 void css_task_iter_end(struct css_task_iter *it)
 {
        if (it->cur_cset) {
-               spin_lock_bh(&css_set_lock);
+               spin_lock_irq(&css_set_lock);
                list_del(&it->iters_node);
                put_css_set_locked(it->cur_cset);
-               spin_unlock_bh(&css_set_lock);
+               spin_unlock_irq(&css_set_lock);
        }
 
        if (it->cur_task)
@@ -4068,10 +4074,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
        mutex_lock(&cgroup_mutex);
 
        /* all tasks in @from are being moved, all csets are source */
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        list_for_each_entry(link, &from->cset_links, cset_link)
                cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        ret = cgroup_migrate_prepare_dst(to, &preloaded_csets);
        if (ret)
@@ -5180,10 +5186,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
         */
        cgrp->self.flags &= ~CSS_ONLINE;
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        list_for_each_entry(link, &cgrp->cset_links, cset_link)
                link->cset->dead = true;
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
 
        /* initiate massacre of all css's */
        for_each_css(css, ssid, cgrp)
@@ -5436,7 +5442,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                goto out;
 
        mutex_lock(&cgroup_mutex);
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
 
        for_each_root(root) {
                struct cgroup_subsys *ss;
@@ -5488,7 +5494,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 
        retval = 0;
 out_unlock:
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
        mutex_unlock(&cgroup_mutex);
        kfree(buf);
 out:
@@ -5649,13 +5655,13 @@ void cgroup_post_fork(struct task_struct *child,
        if (use_task_css_set_links) {
                struct css_set *cset;
 
-               spin_lock_bh(&css_set_lock);
+               spin_lock_irq(&css_set_lock);
                cset = task_css_set(current);
                if (list_empty(&child->cg_list)) {
                        get_css_set(cset);
                        css_set_move_task(child, NULL, cset, false);
                }
-               spin_unlock_bh(&css_set_lock);
+               spin_unlock_irq(&css_set_lock);
        }
 
        /*
@@ -5699,9 +5705,9 @@ void cgroup_exit(struct task_struct *tsk)
        cset = task_css_set(tsk);
 
        if (!list_empty(&tsk->cg_list)) {
-               spin_lock_bh(&css_set_lock);
+               spin_lock_irq(&css_set_lock);
                css_set_move_task(tsk, cset, NULL, false);
-               spin_unlock_bh(&css_set_lock);
+               spin_unlock_irq(&css_set_lock);
        } else {
                get_css_set(cset);
        }
@@ -5914,7 +5920,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
        if (!name_buf)
                return -ENOMEM;
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        rcu_read_lock();
        cset = rcu_dereference(current->cgroups);
        list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
@@ -5925,7 +5931,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
                           c->root->hierarchy_id, name_buf);
        }
        rcu_read_unlock();
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
        kfree(name_buf);
        return 0;
 }
@@ -5936,7 +5942,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
        struct cgroup_subsys_state *css = seq_css(seq);
        struct cgrp_cset_link *link;
 
-       spin_lock_bh(&css_set_lock);
+       spin_lock_irq(&css_set_lock);
        list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
                struct css_set *cset = link->cset;
                struct task_struct *task;
@@ -5959,7 +5965,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
        overflow:
                seq_puts(seq, "  ...\n");
        }
-       spin_unlock_bh(&css_set_lock);
+       spin_unlock_irq(&css_set_lock);
        return 0;
 }