OSDN Git Service

mm/khugepaged: collapse_shmem() without freezing new_page
[android-x86/kernel.git] / mm / khugepaged.c
index a31d740..e2b13c0 100644 (file)
@@ -1287,7 +1287,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
  *
  * Basic scheme is simple, details are more complex:
- *  - allocate and freeze a new huge page;
+ *  - allocate and lock a new huge page;
  *  - scan over radix tree replacing old pages the new one
  *    + swap in pages if necessary;
  *    + fill in gaps;
@@ -1295,11 +1295,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  *  - if replacing succeed:
  *    + copy data over;
  *    + free old pages;
- *    + unfreeze huge page;
+ *    + unlock huge page;
  *  - if replacing failed;
  *    + put all pages back and unfreeze them;
  *    + restore gaps in the radix-tree;
- *    + free huge page;
+ *    + unlock and free huge page;
  */
 static void collapse_shmem(struct mm_struct *mm,
                struct address_space *mapping, pgoff_t start,
@@ -1330,18 +1330,15 @@ static void collapse_shmem(struct mm_struct *mm,
                goto out;
        }
 
+       __SetPageLocked(new_page);
+       __SetPageSwapBacked(new_page);
        new_page->index = start;
        new_page->mapping = mapping;
-       __SetPageSwapBacked(new_page);
-       __SetPageLocked(new_page);
-       BUG_ON(!page_ref_freeze(new_page, 1));
-
 
        /*
-        * At this point the new_page is 'frozen' (page_count() is zero), locked
-        * and not up-to-date. It's safe to insert it into radix tree, because
-        * nobody would be able to map it or use it in other way until we
-        * unfreeze it.
+        * At this point the new_page is locked and not up-to-date.
+        * It's safe to insert it into the page cache, because nobody would
+        * be able to map it or use it in another way until we unlock it.
         */
 
        index = start;
@@ -1350,18 +1347,28 @@ static void collapse_shmem(struct mm_struct *mm,
                int n = min(iter.index, end) - index;
 
                /*
+                * Stop if extent has been hole-punched, and is now completely
+                * empty (the more obvious i_size_read() check would take an
+                * irq-unsafe seqlock on 32-bit).
+                */
+               if (n >= HPAGE_PMD_NR) {
+                       result = SCAN_TRUNCATED;
+                       goto tree_locked;
+               }
+
+               /*
                 * Handle holes in the radix tree: charge it from shmem and
                 * insert relevant subpage of new_page into the radix-tree.
                 */
                if (n && !shmem_charge(mapping->host, n)) {
                        result = SCAN_FAIL;
-                       break;
+                       goto tree_locked;
                }
-               nr_none += n;
                for (; index < min(iter.index, end); index++) {
                        radix_tree_insert(&mapping->i_pages, index,
                                        new_page + (index % HPAGE_PMD_NR));
                }
+               nr_none += n;
 
                /* We are done. */
                if (index >= end)
@@ -1377,12 +1384,12 @@ static void collapse_shmem(struct mm_struct *mm,
                                result = SCAN_FAIL;
                                goto tree_unlocked;
                        }
-                       xa_lock_irq(&mapping->i_pages);
                } else if (trylock_page(page)) {
                        get_page(page);
+                       xa_unlock_irq(&mapping->i_pages);
                } else {
                        result = SCAN_PAGE_LOCK;
-                       break;
+                       goto tree_locked;
                }
 
                /*
@@ -1397,11 +1404,10 @@ static void collapse_shmem(struct mm_struct *mm,
                        result = SCAN_TRUNCATED;
                        goto out_unlock;
                }
-               xa_unlock_irq(&mapping->i_pages);
 
                if (isolate_lru_page(page)) {
                        result = SCAN_DEL_PAGE_LRU;
-                       goto out_isolate_failed;
+                       goto out_unlock;
                }
 
                if (page_mapped(page))
@@ -1422,7 +1428,9 @@ static void collapse_shmem(struct mm_struct *mm,
                 */
                if (!page_ref_freeze(page, 3)) {
                        result = SCAN_PAGE_COUNT;
-                       goto out_lru;
+                       xa_unlock_irq(&mapping->i_pages);
+                       putback_lru_page(page);
+                       goto out_unlock;
                }
 
                /*
@@ -1438,17 +1446,10 @@ static void collapse_shmem(struct mm_struct *mm,
                slot = radix_tree_iter_resume(slot, &iter);
                index++;
                continue;
-out_lru:
-               xa_unlock_irq(&mapping->i_pages);
-               putback_lru_page(page);
-out_isolate_failed:
-               unlock_page(page);
-               put_page(page);
-               goto tree_unlocked;
 out_unlock:
                unlock_page(page);
                put_page(page);
-               break;
+               goto tree_unlocked;
        }
 
        /*
@@ -1456,14 +1457,18 @@ out_unlock:
         * This code only triggers if there's nothing in radix tree
         * beyond 'end'.
         */
-       if (result == SCAN_SUCCEED && index < end) {
+       if (index < end) {
                int n = end - index;
 
+               /* Stop if extent has been truncated, and is now empty */
+               if (n >= HPAGE_PMD_NR) {
+                       result = SCAN_TRUNCATED;
+                       goto tree_locked;
+               }
                if (!shmem_charge(mapping->host, n)) {
                        result = SCAN_FAIL;
                        goto tree_locked;
                }
-
                for (; index < end; index++) {
                        radix_tree_insert(&mapping->i_pages, index,
                                        new_page + (index % HPAGE_PMD_NR));
@@ -1471,59 +1476,64 @@ out_unlock:
                nr_none += n;
        }
 
+       __inc_node_page_state(new_page, NR_SHMEM_THPS);
+       if (nr_none) {
+               struct zone *zone = page_zone(new_page);
+
+               __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
+               __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+       }
+
 tree_locked:
        xa_unlock_irq(&mapping->i_pages);
 tree_unlocked:
 
        if (result == SCAN_SUCCEED) {
-               unsigned long flags;
-               struct zone *zone = page_zone(new_page);
-
                /*
                 * Replacing old pages with new one has succeed, now we need to
                 * copy the content and free old pages.
                 */
+               index = start;
                list_for_each_entry_safe(page, tmp, &pagelist, lru) {
+                       while (index < page->index) {
+                               clear_highpage(new_page + (index % HPAGE_PMD_NR));
+                               index++;
+                       }
                        copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
                                        page);
                        list_del(&page->lru);
-                       unlock_page(page);
-                       page_ref_unfreeze(page, 1);
                        page->mapping = NULL;
+                       page_ref_unfreeze(page, 1);
                        ClearPageActive(page);
                        ClearPageUnevictable(page);
+                       unlock_page(page);
                        put_page(page);
+                       index++;
                }
-
-               local_irq_save(flags);
-               __inc_node_page_state(new_page, NR_SHMEM_THPS);
-               if (nr_none) {
-                       __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
-                       __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+               while (index < end) {
+                       clear_highpage(new_page + (index % HPAGE_PMD_NR));
+                       index++;
                }
-               local_irq_restore(flags);
 
-               /*
-                * Remove pte page tables, so we can re-faulti
-                * the page as huge.
-                */
-               retract_page_tables(mapping, start);
-
-               /* Everything is ready, let's unfreeze the new_page */
-               set_page_dirty(new_page);
                SetPageUptodate(new_page);
-               page_ref_unfreeze(new_page, HPAGE_PMD_NR);
+               page_ref_add(new_page, HPAGE_PMD_NR - 1);
+               set_page_dirty(new_page);
                mem_cgroup_commit_charge(new_page, memcg, false, true);
                lru_cache_add_anon(new_page);
-               unlock_page(new_page);
 
+               /*
+                * Remove pte page tables, so we can re-fault the page as huge.
+                */
+               retract_page_tables(mapping, start);
                *hpage = NULL;
 
                khugepaged_pages_collapsed++;
        } else {
                /* Something went wrong: rollback changes to the radix-tree */
-               shmem_uncharge(mapping->host, nr_none);
                xa_lock_irq(&mapping->i_pages);
+               mapping->nrpages -= nr_none;
+               shmem_uncharge(mapping->host, nr_none);
+
                radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
                        if (iter.index >= end)
                                break;
@@ -1546,19 +1556,18 @@ tree_unlocked:
                        radix_tree_replace_slot(&mapping->i_pages, slot, page);
                        slot = radix_tree_iter_resume(slot, &iter);
                        xa_unlock_irq(&mapping->i_pages);
-                       putback_lru_page(page);
                        unlock_page(page);
+                       putback_lru_page(page);
                        xa_lock_irq(&mapping->i_pages);
                }
                VM_BUG_ON(nr_none);
                xa_unlock_irq(&mapping->i_pages);
 
-               /* Unfreeze new_page, caller would take care about freeing it */
-               page_ref_unfreeze(new_page, 1);
                mem_cgroup_cancel_charge(new_page, memcg, true);
-               unlock_page(new_page);
                new_page->mapping = NULL;
        }
+
+       unlock_page(new_page);
 out:
        VM_BUG_ON(!list_empty(&pagelist));
        /* TODO: tracepoints */