OSDN Git Service

memcg: simplify mem_cgroup_iter
[uclinux-h8/linux.git] / mm / memcontrol.c
index 53b8201..614d0d9 100644 (file)
@@ -152,8 +152,13 @@ struct mem_cgroup_stat_cpu {
 };
 
 struct mem_cgroup_reclaim_iter {
-       /* css_id of the last scanned hierarchy member */
-       int position;
+       /*
+        * last scanned hierarchy member. Valid only if last_dead_count
+        * matches memcg->dead_count of the hierarchy root group.
+        */
+       struct mem_cgroup *last_visited;
+       unsigned long last_dead_count;
+
        /* scan generation, increased every round-trip */
        unsigned int generation;
 };
@@ -335,6 +340,7 @@ struct mem_cgroup {
        struct mem_cgroup_stat_cpu nocpu_base;
        spinlock_t pcp_counter_lock;
 
+       atomic_t        dead_count;
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
        struct tcp_memcontrol tcp_mem;
 #endif
@@ -1089,7 +1095,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
                                   struct mem_cgroup_reclaim_cookie *reclaim)
 {
        struct mem_cgroup *memcg = NULL;
-       int id = 0;
+       struct mem_cgroup *last_visited = NULL;
+       unsigned long uninitialized_var(dead_count);
 
        if (mem_cgroup_disabled())
                return NULL;
@@ -1098,20 +1105,17 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
                root = root_mem_cgroup;
 
        if (prev && !reclaim)
-               id = css_id(&prev->css);
-
-       if (prev && prev != root)
-               css_put(&prev->css);
+               last_visited = prev;
 
        if (!root->use_hierarchy && root != root_mem_cgroup) {
                if (prev)
-                       return NULL;
+                       goto out_css_put;
                return root;
        }
 
+       rcu_read_lock();
        while (!memcg) {
                struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
-               struct cgroup_subsys_state *css;
 
                if (reclaim) {
                        int nid = zone_to_nid(reclaim->zone);
@@ -1120,31 +1124,92 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 
                        mz = mem_cgroup_zoneinfo(root, nid, zid);
                        iter = &mz->reclaim_iter[reclaim->priority];
-                       if (prev && reclaim->generation != iter->generation)
-                               return NULL;
-                       id = iter->position;
+                       last_visited = iter->last_visited;
+                       if (prev && reclaim->generation != iter->generation) {
+                               iter->last_visited = NULL;
+                               goto out_unlock;
+                       }
+
+                       /*
+                        * If the dead_count mismatches, a destruction
+                        * has happened or is happening concurrently.
+                        * If the dead_count matches, a destruction
+                        * might still happen concurrently, but since
+                        * we checked under RCU, that destruction
+                        * won't free the object until we release the
+                        * RCU reader lock.  Thus, the dead_count
+                        * check verifies the pointer is still valid,
+                        * css_tryget() verifies the cgroup pointed to
+                        * is alive.
+                        */
+                       dead_count = atomic_read(&root->dead_count);
+                       smp_rmb();
+                       last_visited = iter->last_visited;
+                       if (last_visited) {
+                               if ((dead_count != iter->last_dead_count) ||
+                                       !css_tryget(&last_visited->css)) {
+                                       last_visited = NULL;
+                               }
+                       }
                }
 
-               rcu_read_lock();
-               css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id);
-               if (css) {
-                       if (css == &root->css || css_tryget(css))
-                               memcg = mem_cgroup_from_css(css);
-               } else
-                       id = 0;
-               rcu_read_unlock();
+               /*
+                * Root is not visited by cgroup iterators so it needs an
+                * explicit visit.
+                */
+               if (!last_visited) {
+                       memcg = root;
+               } else {
+                       struct cgroup *prev_cgroup, *next_cgroup;
+
+                       prev_cgroup = (last_visited == root) ? NULL
+                               : last_visited->css.cgroup;
+skip_node:
+                       next_cgroup = cgroup_next_descendant_pre(
+                                       prev_cgroup, root->css.cgroup);
+
+                       /*
+                        * Even if we found a group we have to make sure it is
+                        * alive. css && !memcg means that the groups should be
+                        * skipped and we should continue the tree walk.
+                        * last_visited css is safe to use because it is
+                        * protected by css_get and the tree walk is rcu safe.
+                        */
+                       if (next_cgroup) {
+                               struct mem_cgroup *mem = mem_cgroup_from_cont(
+                                               next_cgroup);
+                               if (css_tryget(&mem->css))
+                                       memcg = mem;
+                               else {
+                                       prev_cgroup = next_cgroup;
+                                       goto skip_node;
+                               }
+                       }
+               }
 
                if (reclaim) {
-                       iter->position = id;
-                       if (!css)
+                       if (last_visited)
+                               css_put(&last_visited->css);
+
+                       iter->last_visited = memcg;
+                       smp_wmb();
+                       iter->last_dead_count = dead_count;
+
+                       if (!memcg)
                                iter->generation++;
                        else if (!prev && memcg)
                                reclaim->generation = iter->generation;
                }
 
-               if (prev && !css)
-                       return NULL;
+               if (prev && !memcg)
+                       goto out_unlock;
        }
+out_unlock:
+       rcu_read_unlock();
+out_css_put:
+       if (prev && prev != root)
+               css_put(&prev->css);
+
        return memcg;
 }
 
@@ -3012,6 +3077,8 @@ void memcg_update_array_size(int num)
                memcg_limited_groups_array_size = memcg_caches_array_size(num);
 }
 
+static void kmem_cache_destroy_work_func(struct work_struct *w);
+
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
 {
        struct memcg_cache_params *cur_params = s->memcg_params;
@@ -3031,6 +3098,8 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
                        return -ENOMEM;
                }
 
+               INIT_WORK(&s->memcg_params->destroy,
+                               kmem_cache_destroy_work_func);
                s->memcg_params->is_root_cache = true;
 
                /*
@@ -3078,6 +3147,8 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
        if (!s->memcg_params)
                return -ENOMEM;
 
+       INIT_WORK(&s->memcg_params->destroy,
+                       kmem_cache_destroy_work_func);
        if (memcg) {
                s->memcg_params->memcg = memcg;
                s->memcg_params->root_cache = root_cache;
@@ -3358,8 +3429,6 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
        list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
                cachep = memcg_params_to_cache(params);
                cachep->memcg_params->dead = true;
-               INIT_WORK(&cachep->memcg_params->destroy,
-                                 kmem_cache_destroy_work_func);
                schedule_work(&cachep->memcg_params->destroy);
        }
        mutex_unlock(&memcg->slab_caches_mutex);
@@ -6180,10 +6249,29 @@ mem_cgroup_css_online(struct cgroup *cont)
        return error;
 }
 
+/*
+ * Announce all parents that a group from their hierarchy is gone.
+ */
+static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
+{
+       struct mem_cgroup *parent = memcg;
+
+       while ((parent = parent_mem_cgroup(parent)))
+               atomic_inc(&parent->dead_count);
+
+       /*
+        * if the root memcg is not hierarchical we have to check it
+        * explicitely.
+        */
+       if (!root_mem_cgroup->use_hierarchy)
+               atomic_inc(&root_mem_cgroup->dead_count);
+}
+
 static void mem_cgroup_css_offline(struct cgroup *cont)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
+       mem_cgroup_invalidate_reclaim_iterators(memcg);
        mem_cgroup_reparent_charges(memcg);
        mem_cgroup_destroy_all_caches(memcg);
 }