mm: thp: fix mmu_notifier in migrate_misplaced_transhuge_page()

author Andrea Arcangeli <aarcange@redhat.com>

Fri, 26 Oct 2018 22:10:40 +0000 (15:10 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 26 Oct 2018 23:38:15 +0000 (16:38 -0700)
author Andrea Arcangeli <aarcange@redhat.com>
Fri, 26 Oct 2018 22:10:40 +0000 (15:10 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Oct 2018 23:38:15 +0000 (16:38 -0700)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 25c7d75..25ef59b 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1558,8 +1558,20 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
          * We are not sure a pending tlb flush here is for a huge page
          * mapping or not. Hence use the tlb range variant
          */
-       if (mm_tlb_flush_pending(vma->vm_mm))
+       if (mm_tlb_flush_pending(vma->vm_mm)) {
                 flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+               /*
+                * change_huge_pmd() released the pmd lock before
+                * invalidating the secondary MMUs sharing the primary
+                * MMU pagetables (with ->invalidate_range()). The
+                * mmu_notifier_invalidate_range_end() (which
+                * internally calls ->invalidate_range()) in
+                * change_pmd_range() will run after us, so we can't
+                * rely on it here and we need an explicit invalidate.
+                */
+               mmu_notifier_invalidate_range(vma->vm_mm, haddr,
+                                             haddr + HPAGE_PMD_SIZE);
+       }
  
         /*
          * Migrate the THP to the requested node, returns with page unlocked
diff --git a/mm/migrate.c b/mm/migrate.c

index 93d9a1e..905c226 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1975,8 +1975,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
         int isolated = 0;
         struct page *new_page = NULL;
         int page_lru = page_is_file_cache(page);
-       unsigned long mmun_start = address & HPAGE_PMD_MASK;
-       unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
+       unsigned long start = address & HPAGE_PMD_MASK;
+       unsigned long end = start + HPAGE_PMD_SIZE;
  
         new_page = alloc_pages_node(node,
                 (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
@@ -2003,11 +2003,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
         WARN_ON(PageLRU(new_page));
  
         /* Recheck the target PMD */
-       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
         ptl = pmd_lock(mm, pmd);
         if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
                 spin_unlock(ptl);
-               mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
  
                 /* Reverse changes made by migrate_page_copy() */
                 if (TestClearPageActive(new_page))
@@ -2038,8 +2036,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
          * new page and page_add_new_anon_rmap guarantee the copy is
          * visible before the pagetable update.
          */
-       flush_cache_range(vma, mmun_start, mmun_end);
-       page_add_anon_rmap(new_page, vma, mmun_start, true);
+       flush_cache_range(vma, start, end);
+       page_add_anon_rmap(new_page, vma, start, true);
         /*
          * At this point the pmd is numa/protnone (i.e. non present) and the TLB
          * has already been flushed globally.  So no TLB can be currently
@@ -2051,7 +2049,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
          * MADV_DONTNEED won't wait on the pmd lock and it'll skip clearing this
          * pmd.
          */
-       set_pmd_at(mm, mmun_start, pmd, entry);
+       set_pmd_at(mm, start, pmd, entry);
         update_mmu_cache_pmd(vma, address, &entry);
  
         page_ref_unfreeze(page, 2);
@@ -2060,11 +2058,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
         set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
  
         spin_unlock(ptl);
-       /*
-        * No need to double call mmu_notifier->invalidate_range() callback as
-        * the above pmdp_huge_clear_flush_notify() did already call it.
-        */
-       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
  
         /* Take an "isolate" reference and put new page on the LRU. */
         get_page(new_page);
@@ -2088,7 +2081,7 @@ out_fail:
         ptl = pmd_lock(mm, pmd);
         if (pmd_same(*pmd, entry)) {
                 entry = pmd_modify(entry, vma->vm_page_prot);
-               set_pmd_at(mm, mmun_start, pmd, entry);
+               set_pmd_at(mm, start, pmd, entry);
                 update_mmu_cache_pmd(vma, address, &entry);
         }
         spin_unlock(ptl);
author	Andrea Arcangeli <aarcange@redhat.com>
	Fri, 26 Oct 2018 22:10:40 +0000 (15:10 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 26 Oct 2018 23:38:15 +0000 (16:38 -0700)
mm/huge_memory.c		patch \| blob \| history
mm/migrate.c		patch \| blob \| history