OSDN Git Service

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[tomoyo/tomoyo-test1.git] / mm / zsmalloc.c
index 9445bee..702bc3f 100644 (file)
  * have room for two bit at least.
  */
 #define OBJ_ALLOCATED_TAG 1
-#define OBJ_TAG_BITS 1
+
+#ifdef CONFIG_ZPOOL
+/*
+ * The second least-significant bit in the object's header identifies if the
+ * value stored at the header is a deferred handle from the last reclaim
+ * attempt.
+ *
+ * As noted above, this is valid because we have room for two bits.
+ */
+#define OBJ_DEFERRED_HANDLE_TAG        2
+#define OBJ_TAG_BITS   2
+#define OBJ_TAG_MASK   (OBJ_ALLOCATED_TAG | OBJ_DEFERRED_HANDLE_TAG)
+#else
+#define OBJ_TAG_BITS   1
+#define OBJ_TAG_MASK   OBJ_ALLOCATED_TAG
+#endif /* CONFIG_ZPOOL */
+
 #define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
 #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
 
@@ -222,6 +238,12 @@ struct link_free {
                 * Handle of allocated object.
                 */
                unsigned long handle;
+#ifdef CONFIG_ZPOOL
+               /*
+                * Deferred handle of a reclaimed object.
+                */
+               unsigned long deferred_handle;
+#endif
        };
 };
 
@@ -272,8 +294,6 @@ struct zspage {
        /* links the zspage to the lru list in the pool */
        struct list_head lru;
        bool under_reclaim;
-       /* list of unfreed handles whose objects have been reclaimed */
-       unsigned long *deferred_handles;
 #endif
 
        struct zs_pool *pool;
@@ -897,7 +917,8 @@ static unsigned long handle_to_obj(unsigned long handle)
        return *(unsigned long *)handle;
 }
 
-static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
+static bool obj_tagged(struct page *page, void *obj, unsigned long *phandle,
+               int tag)
 {
        unsigned long handle;
        struct zspage *zspage = get_zspage(page);
@@ -908,13 +929,27 @@ static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
        } else
                handle = *(unsigned long *)obj;
 
-       if (!(handle & OBJ_ALLOCATED_TAG))
+       if (!(handle & tag))
                return false;
 
-       *phandle = handle & ~OBJ_ALLOCATED_TAG;
+       /* Clear all tags before returning the handle */
+       *phandle = handle & ~OBJ_TAG_MASK;
        return true;
 }
 
+static inline bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
+{
+       return obj_tagged(page, obj, phandle, OBJ_ALLOCATED_TAG);
+}
+
+#ifdef CONFIG_ZPOOL
+static bool obj_stores_deferred_handle(struct page *page, void *obj,
+               unsigned long *phandle)
+{
+       return obj_tagged(page, obj, phandle, OBJ_DEFERRED_HANDLE_TAG);
+}
+#endif
+
 static void reset_page(struct page *page)
 {
        __ClearPageMovable(page);
@@ -946,22 +981,36 @@ unlock:
 }
 
 #ifdef CONFIG_ZPOOL
+static unsigned long find_deferred_handle_obj(struct size_class *class,
+               struct page *page, int *obj_idx);
+
 /*
  * Free all the deferred handles whose objects are freed in zs_free.
  */
-static void free_handles(struct zs_pool *pool, struct zspage *zspage)
+static void free_handles(struct zs_pool *pool, struct size_class *class,
+               struct zspage *zspage)
 {
-       unsigned long handle = (unsigned long)zspage->deferred_handles;
+       int obj_idx = 0;
+       struct page *page = get_first_page(zspage);
+       unsigned long handle;
 
-       while (handle) {
-               unsigned long nxt_handle = handle_to_obj(handle);
+       while (1) {
+               handle = find_deferred_handle_obj(class, page, &obj_idx);
+               if (!handle) {
+                       page = get_next_page(page);
+                       if (!page)
+                               break;
+                       obj_idx = 0;
+                       continue;
+               }
 
                cache_free_handle(pool, handle);
-               handle = nxt_handle;
+               obj_idx++;
        }
 }
 #else
-static inline void free_handles(struct zs_pool *pool, struct zspage *zspage) {}
+static inline void free_handles(struct zs_pool *pool, struct size_class *class,
+               struct zspage *zspage) {}
 #endif
 
 static void __free_zspage(struct zs_pool *pool, struct size_class *class,
@@ -979,7 +1028,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
        VM_BUG_ON(fg != ZS_EMPTY);
 
        /* Free all deferred handles from zs_free */
-       free_handles(pool, zspage);
+       free_handles(pool, class, zspage);
 
        next = page = get_first_page(zspage);
        do {
@@ -1067,7 +1116,6 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
 #ifdef CONFIG_ZPOOL
        INIT_LIST_HEAD(&zspage->lru);
        zspage->under_reclaim = false;
-       zspage->deferred_handles = NULL;
 #endif
 
        set_freeobj(zspage, 0);
@@ -1568,7 +1616,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(zs_malloc);
 
-static void obj_free(int class_size, unsigned long obj)
+static void obj_free(int class_size, unsigned long obj, unsigned long *handle)
 {
        struct link_free *link;
        struct zspage *zspage;
@@ -1582,15 +1630,29 @@ static void obj_free(int class_size, unsigned long obj)
        zspage = get_zspage(f_page);
 
        vaddr = kmap_atomic(f_page);
-
-       /* Insert this object in containing zspage's freelist */
        link = (struct link_free *)(vaddr + f_offset);
-       if (likely(!ZsHugePage(zspage)))
-               link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
-       else
-               f_page->index = 0;
+
+       if (handle) {
+#ifdef CONFIG_ZPOOL
+               /* Stores the (deferred) handle in the object's header */
+               *handle |= OBJ_DEFERRED_HANDLE_TAG;
+               *handle &= ~OBJ_ALLOCATED_TAG;
+
+               if (likely(!ZsHugePage(zspage)))
+                       link->deferred_handle = *handle;
+               else
+                       f_page->index = *handle;
+#endif
+       } else {
+               /* Insert this object in containing zspage's freelist */
+               if (likely(!ZsHugePage(zspage)))
+                       link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
+               else
+                       f_page->index = 0;
+               set_freeobj(zspage, f_objidx);
+       }
+
        kunmap_atomic(vaddr);
-       set_freeobj(zspage, f_objidx);
        mod_zspage_inuse(zspage, -1);
 }
 
@@ -1615,7 +1677,6 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
        zspage = get_zspage(f_page);
        class = zspage_class(pool, zspage);
 
-       obj_free(class->size, obj);
        class_stat_dec(class, OBJ_USED, 1);
 
 #ifdef CONFIG_ZPOOL
@@ -1624,15 +1685,15 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
                 * Reclaim needs the handles during writeback. It'll free
                 * them along with the zspage when it's done with them.
                 *
-                * Record current deferred handle at the memory location
-                * whose address is given by handle.
+                * Record current deferred handle in the object's header.
                 */
-               record_obj(handle, (unsigned long)zspage->deferred_handles);
-               zspage->deferred_handles = (unsigned long *)handle;
+               obj_free(class->size, obj, &handle);
                spin_unlock(&pool->lock);
                return;
        }
 #endif
+       obj_free(class->size, obj, NULL);
+
        fullness = fix_fullness_group(class, zspage);
        if (fullness == ZS_EMPTY)
                free_zspage(pool, class, zspage);
@@ -1713,11 +1774,11 @@ static void zs_object_copy(struct size_class *class, unsigned long dst,
 }
 
 /*
- * Find alloced object in zspage from index object and
+ * Find object with a certain tag in zspage from index object and
  * return handle.
  */
-static unsigned long find_alloced_obj(struct size_class *class,
-                                       struct page *page, int *obj_idx)
+static unsigned long find_tagged_obj(struct size_class *class,
+                                       struct page *page, int *obj_idx, int tag)
 {
        unsigned int offset;
        int index = *obj_idx;
@@ -1728,7 +1789,7 @@ static unsigned long find_alloced_obj(struct size_class *class,
        offset += class->size * index;
 
        while (offset < PAGE_SIZE) {
-               if (obj_allocated(page, addr + offset, &handle))
+               if (obj_tagged(page, addr + offset, &handle, tag))
                        break;
 
                offset += class->size;
@@ -1742,6 +1803,28 @@ static unsigned long find_alloced_obj(struct size_class *class,
        return handle;
 }
 
+/*
+ * Find alloced object in zspage from index object and
+ * return handle.
+ */
+static unsigned long find_alloced_obj(struct size_class *class,
+                                       struct page *page, int *obj_idx)
+{
+       return find_tagged_obj(class, page, obj_idx, OBJ_ALLOCATED_TAG);
+}
+
+#ifdef CONFIG_ZPOOL
+/*
+ * Find object storing a deferred handle in header in zspage from index object
+ * and return handle.
+ */
+static unsigned long find_deferred_handle_obj(struct size_class *class,
+               struct page *page, int *obj_idx)
+{
+       return find_tagged_obj(class, page, obj_idx, OBJ_DEFERRED_HANDLE_TAG);
+}
+#endif
+
 struct zs_compact_control {
        /* Source spage for migration which could be a subpage of zspage */
        struct page *s_page;
@@ -1784,7 +1867,7 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
                zs_object_copy(class, free_obj, used_obj);
                obj_idx++;
                record_obj(handle, free_obj);
-               obj_free(class->size, used_obj);
+               obj_free(class->size, used_obj, NULL);
        }
 
        /* Remember last position in this iteration */
@@ -2478,6 +2561,90 @@ void zs_destroy_pool(struct zs_pool *pool)
 EXPORT_SYMBOL_GPL(zs_destroy_pool);
 
 #ifdef CONFIG_ZPOOL
+static void restore_freelist(struct zs_pool *pool, struct size_class *class,
+               struct zspage *zspage)
+{
+       unsigned int obj_idx = 0;
+       unsigned long handle, off = 0; /* off is within-page offset */
+       struct page *page = get_first_page(zspage);
+       struct link_free *prev_free = NULL;
+       void *prev_page_vaddr = NULL;
+
+       /* in case no free object found */
+       set_freeobj(zspage, (unsigned int)(-1UL));
+
+       while (page) {
+               void *vaddr = kmap_atomic(page);
+               struct page *next_page;
+
+               while (off < PAGE_SIZE) {
+                       void *obj_addr = vaddr + off;
+
+                       /* skip allocated object */
+                       if (obj_allocated(page, obj_addr, &handle)) {
+                               obj_idx++;
+                               off += class->size;
+                               continue;
+                       }
+
+                       /* free deferred handle from reclaim attempt */
+                       if (obj_stores_deferred_handle(page, obj_addr, &handle))
+                               cache_free_handle(pool, handle);
+
+                       if (prev_free)
+                               prev_free->next = obj_idx << OBJ_TAG_BITS;
+                       else /* first free object found */
+                               set_freeobj(zspage, obj_idx);
+
+                       prev_free = (struct link_free *)vaddr + off / sizeof(*prev_free);
+                       /* if last free object in a previous page, need to unmap */
+                       if (prev_page_vaddr) {
+                               kunmap_atomic(prev_page_vaddr);
+                               prev_page_vaddr = NULL;
+                       }
+
+                       obj_idx++;
+                       off += class->size;
+               }
+
+               /*
+                * Handle the last (full or partial) object on this page.
+                */
+               next_page = get_next_page(page);
+               if (next_page) {
+                       if (!prev_free || prev_page_vaddr) {
+                               /*
+                                * There is no free object in this page, so we can safely
+                                * unmap it.
+                                */
+                               kunmap_atomic(vaddr);
+                       } else {
+                               /* update prev_page_vaddr since prev_free is on this page */
+                               prev_page_vaddr = vaddr;
+                       }
+               } else { /* this is the last page */
+                       if (prev_free) {
+                               /*
+                                * Reset OBJ_TAG_BITS bit to last link to tell
+                                * whether it's allocated object or not.
+                                */
+                               prev_free->next = -1UL << OBJ_TAG_BITS;
+                       }
+
+                       /* unmap previous page (if not done yet) */
+                       if (prev_page_vaddr) {
+                               kunmap_atomic(prev_page_vaddr);
+                               prev_page_vaddr = NULL;
+                       }
+
+                       kunmap_atomic(vaddr);
+               }
+
+               page = next_page;
+               off %= PAGE_SIZE;
+       }
+}
+
 static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
 {
        int i, obj_idx, ret = 0;
@@ -2561,6 +2728,12 @@ next:
                        return 0;
                }
 
+               /*
+                * Eviction fails on one of the handles, so we need to restore zspage.
+                * We need to rebuild its freelist (and free stored deferred handles),
+                * put it back to the correct size class, and add it to the LRU list.
+                */
+               restore_freelist(pool, class, zspage);
                putback_zspage(class, zspage);
                list_add(&zspage->lru, &pool->lru);
                unlock_zspage(zspage);