Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penber...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 28 Mar 2012 22:04:26 +0000 (15:04 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 28 Mar 2012 22:04:26 +0000 (15:04 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 28 Mar 2012 22:04:26 +0000 (15:04 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 28 Mar 2012 22:04:26 +0000 (15:04 -0700)
diff --combined include/linux/slub_def.h

index ca122b3,6388a66..c2f8c8b
--- 1/include/linux/slub_def.h
--- 2/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@@ -8,7 -8,6 +8,7 @@@
    */
   #include <linux/types.h>
   #include <linux/gfp.h>
+ +#include <linux/bug.h>
   #include <linux/workqueue.h>
   #include <linux/kobject.h>
   
@@@ -22,7 -21,7 +22,7 @@@ enum stat_item 
         FREE_FROZEN,            /* Freeing to frozen slab */
         FREE_ADD_PARTIAL,       /* Freeing moves slab to partial list */
         FREE_REMOVE_PARTIAL,    /* Freeing removes last object */
-       ALLOC_FROM_PARTIAL,     /* Cpu slab acquired from partial list */
+       ALLOC_FROM_PARTIAL,     /* Cpu slab acquired from node partial list */
         ALLOC_SLAB,             /* Cpu slab acquired from page allocator */
         ALLOC_REFILL,           /* Refill cpu slab from slab freelist */
         ALLOC_NODE_MISMATCH,    /* Switching cpu slab */
@@@ -38,7 -37,9 +38,9 @@@
         CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
         CMPXCHG_DOUBLE_FAIL,    /* Number of times that cmpxchg double did not match */
         CPU_PARTIAL_ALLOC,      /* Used cpu partial on alloc */
-       CPU_PARTIAL_FREE,       /* USed cpu partial on free */
+       CPU_PARTIAL_FREE,       /* Refill cpu partial on free */
+       CPU_PARTIAL_NODE,       /* Refill cpu partial from node partial */
+       CPU_PARTIAL_DRAIN,      /* Drain cpu partial to node partial */
         NR_SLUB_STAT_ITEMS };
   
   struct kmem_cache_cpu {
diff --combined mm/slab.c

index 29c8716,67e0e05..e901a36
--- 1/mm/slab.c
--- 2/mm/slab.c
+++ b/mm/slab.c
@@@ -1731,6 -1731,52 +1731,52 @@@ static int __init cpucache_init(void
   }
   __initcall(cpucache_init);
   
+ static noinline void
+ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
+ {
+       struct kmem_list3 *l3;
+       struct slab *slabp;
+       unsigned long flags;
+       int node;
+ 
+       printk(KERN_WARNING
+               "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n",
+               nodeid, gfpflags);
+       printk(KERN_WARNING "  cache: %s, object size: %d, order: %d\n",
+               cachep->name, cachep->buffer_size, cachep->gfporder);
+ 
+       for_each_online_node(node) {
+               unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
+               unsigned long active_slabs = 0, num_slabs = 0;
+ 
+               l3 = cachep->nodelists[node];
+               if (!l3)
+                       continue;
+ 
+               spin_lock_irqsave(&l3->list_lock, flags);
+               list_for_each_entry(slabp, &l3->slabs_full, list) {
+                       active_objs += cachep->num;
+                       active_slabs++;
+               }
+               list_for_each_entry(slabp, &l3->slabs_partial, list) {
+                       active_objs += slabp->inuse;
+                       active_slabs++;
+               }
+               list_for_each_entry(slabp, &l3->slabs_free, list)
+                       num_slabs++;
+ 
+               free_objects += l3->free_objects;
+               spin_unlock_irqrestore(&l3->list_lock, flags);
+ 
+               num_slabs += active_slabs;
+               num_objs = num_slabs * cachep->num;
+               printk(KERN_WARNING
+                       "  node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
+                       node, active_slabs, num_slabs, active_objs, num_objs,
+                       free_objects);
+       }
+ }
+ 
   /*
    * Interface to system's page allocator. No need to hold the cache-lock.
    *
@@@ -1757,8 -1803,11 +1803,11 @@@ static void *kmem_getpages(struct kmem_
                 flags |= __GFP_RECLAIMABLE;
   
         page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
-       if (!page)
+       if (!page) {
+               if (!(flags & __GFP_NOWARN) && printk_ratelimit())
+                       slab_out_of_memory(cachep, flags, nodeid);
                 return NULL;
+       }
   
         nr_pages = (1 << cachep->gfporder);
         if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@@ -3284,10 -3333,12 +3333,10 @@@ static void *alternate_node_alloc(struc
         if (in_interrupt() || (flags & __GFP_THISNODE))
                 return NULL;
         nid_alloc = nid_here = numa_mem_id();
- -      get_mems_allowed();
         if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
                 nid_alloc = cpuset_slab_spread_node();
         else if (current->mempolicy)
                 nid_alloc = slab_node(current->mempolicy);
- -      put_mems_allowed();
         if (nid_alloc != nid_here)
                 return ____cache_alloc_node(cachep, flags, nid_alloc);
         return NULL;
@@@ -3310,17 -3361,14 +3359,17 @@@ static void *fallback_alloc(struct kmem
         enum zone_type high_zoneidx = gfp_zone(flags);
         void *obj = NULL;
         int nid;
+ +      unsigned int cpuset_mems_cookie;
   
         if (flags & __GFP_THISNODE)
                 return NULL;
   
- -      get_mems_allowed();
- -      zonelist = node_zonelist(slab_node(current->mempolicy), flags);
         local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
   
+ +retry_cpuset:
+ +      cpuset_mems_cookie = get_mems_allowed();
+ +      zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+ +
   retry:
         /*
          * Look through allowed nodes for objects available
@@@ -3373,9 -3421,7 +3422,9 @@@
                         }
                 }
         }
- -      put_mems_allowed();
+ +
+ +      if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj))
+ +              goto retry_cpuset;
         return obj;
   }
   
@@@ -3696,13 -3742,12 +3745,12 @@@ static inline void __cache_free(struct 
   
         if (likely(ac->avail < ac->limit)) {
                 STATS_INC_FREEHIT(cachep);
-               ac->entry[ac->avail++] = objp;
-               return;
         } else {
                 STATS_INC_FREEMISS(cachep);
                 cache_flusharray(cachep, ac);
-               ac->entry[ac->avail++] = objp;
         }
+ 
+       ac->entry[ac->avail++] = objp;
   }
   
   /**
diff --combined mm/slub.c

index f4a6229,24132ed..64d9966
--- 1/mm/slub.c
--- 2/mm/slub.c
+++ b/mm/slub.c
@@@ -29,6 -29,7 +29,7 @@@
   #include <linux/math64.h>
   #include <linux/fault-inject.h>
   #include <linux/stacktrace.h>
+ #include <linux/prefetch.h>
   
   #include <trace/events/kmem.h>
   
@@@ -269,6 -270,11 +270,11 @@@ static inline void *get_freepointer(str
         return *(void **)(object + s->offset);
   }
   
+ static void prefetch_freepointer(const struct kmem_cache *s, void *object)
+ {
+       prefetch(object + s->offset);
+ }
+ 
   static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
   {
         void *p;
@@@ -1560,6 -1566,7 +1566,7 @@@ static void *get_partial_node(struct km
                 } else {
                         page->freelist = t;
                         available = put_cpu_partial(s, page, 0);
+                       stat(s, CPU_PARTIAL_NODE);
                 }
                 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
                         break;
@@@ -1581,7 -1588,6 +1588,7 @@@ static struct page *get_any_partial(str
         struct zone *zone;
         enum zone_type high_zoneidx = gfp_zone(flags);
         void *object;
+ +      unsigned int cpuset_mems_cookie;
   
         /*
          * The defrag ratio allows a configuration of the tradeoffs between
@@@ -1605,32 -1611,23 +1612,32 @@@
                         get_cycles() % 1024 > s->remote_node_defrag_ratio)
                 return NULL;
   
- -      get_mems_allowed();
- -      zonelist = node_zonelist(slab_node(current->mempolicy), flags);
- -      for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
- -              struct kmem_cache_node *n;
- -
- -              n = get_node(s, zone_to_nid(zone));
- -
- -              if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
- -                              n->nr_partial > s->min_partial) {
- -                      object = get_partial_node(s, n, c);
- -                      if (object) {
- -                              put_mems_allowed();
- -                              return object;
+ +      do {
+ +              cpuset_mems_cookie = get_mems_allowed();
+ +              zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+ +              for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+ +                      struct kmem_cache_node *n;
+ +
+ +                      n = get_node(s, zone_to_nid(zone));
+ +
+ +                      if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
+ +                                      n->nr_partial > s->min_partial) {
+ +                              object = get_partial_node(s, n, c);
+ +                              if (object) {
+ +                                      /*
+ +                                       * Return the object even if
+ +                                       * put_mems_allowed indicated that
+ +                                       * the cpuset mems_allowed was
+ +                                       * updated in parallel. It's a
+ +                                       * harmless race between the alloc
+ +                                       * and the cpuset update.
+ +                                       */
+ +                                      put_mems_allowed(cpuset_mems_cookie);
+ +                                      return object;
+ +                              }
                         }
                 }
- -      }
- -      put_mems_allowed();
+ +      } while (!put_mems_allowed(cpuset_mems_cookie));
   #endif
         return NULL;
   }
@@@ -1983,6 -1980,7 +1990,7 @@@ int put_cpu_partial(struct kmem_cache *
                                 local_irq_restore(flags);
                                 pobjects = 0;
                                 pages = 0;
+                               stat(s, CPU_PARTIAL_DRAIN);
                         }
                 }
   
@@@ -1994,7 -1992,6 +2002,6 @@@
                 page->next = oldpage;
   
         } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
-       stat(s, CPU_PARTIAL_FREE);
         return pobjects;
   }
   
@@@ -2319,6 -2316,8 +2326,8 @@@ redo
                 object = __slab_alloc(s, gfpflags, node, addr, c);
   
         else {
+               void *next_object = get_freepointer_safe(s, object);
+ 
                 /*
                  * The cmpxchg will only match if there was no additional
                  * operation and if we are on the right processor.
@@@ -2334,11 -2333,12 +2343,12 @@@
                 if (unlikely(!this_cpu_cmpxchg_double(
                                 s->cpu_slab->freelist, s->cpu_slab->tid,
                                 object, tid,
-                               get_freepointer_safe(s, object), next_tid(tid)))) {
+                               next_object, next_tid(tid)))) {
   
                         note_cmpxchg_failure("slab_alloc", s, tid);
                         goto redo;
                 }
+               prefetch_freepointer(s, next_object);
                 stat(s, ALLOC_FASTPATH);
         }
   
@@@ -2475,9 -2475,10 +2485,10 @@@ static void __slab_free(struct kmem_cac
                  * If we just froze the page then put it onto the
                  * per cpu partial list.
                  */
-               if (new.frozen && !was_frozen)
+               if (new.frozen && !was_frozen) {
                         put_cpu_partial(s, page, 1);
- 
+                       stat(s, CPU_PARTIAL_FREE);
+               }
                 /*
                  * The list lock was not taken therefore no list
                  * activity can be necessary.
@@@ -3939,13 -3940,14 +3950,14 @@@ struct kmem_cache *kmem_cache_create(co
                 if (kmem_cache_open(s, n,
                                 size, align, flags, ctor)) {
                         list_add(&s->list, &slab_caches);
+                       up_write(&slub_lock);
                         if (sysfs_slab_add(s)) {
+                               down_write(&slub_lock);
                                 list_del(&s->list);
                                 kfree(n);
                                 kfree(s);
                                 goto err;
                         }
-                       up_write(&slub_lock);
                         return s;
                 }
                 kfree(n);
@@@ -5069,6 -5071,8 +5081,8 @@@ STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpx
   STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
   STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
   STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
+ STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
+ STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
   #endif
   
   static struct attribute *slab_attrs[] = {
@@@ -5134,6 -5138,8 +5148,8 @@@
         &cmpxchg_double_cpu_fail_attr.attr,
         &cpu_partial_alloc_attr.attr,
         &cpu_partial_free_attr.attr,
+       &cpu_partial_node_attr.attr,
+       &cpu_partial_drain_attr.attr,
   #endif
   #ifdef CONFIG_FAILSLAB
         &failslab_attr.attr,
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 28 Mar 2012 22:04:26 +0000 (15:04 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 28 Mar 2012 22:04:26 +0000 (15:04 -0700)
		1	2
include/linux/slub_def.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slab.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/slub.c	patch \|	diff1 \|	diff2 \|	blob \| history