mm: streamline COW logic in do_swap_page()

author David Hildenbrand <david@redhat.com>

Fri, 25 Mar 2022 01:13:40 +0000 (18:13 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Mar 2022 02:06:50 +0000 (19:06 -0700)
author David Hildenbrand <david@redhat.com>
Fri, 25 Mar 2022 01:13:40 +0000 (18:13 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2022 02:06:50 +0000 (19:06 -0700)
diff --git a/mm/memory.c b/mm/memory.c

index 8e30675..f721735 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3489,6 +3489,25 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
         return 0;
  }
  
+static inline bool should_try_to_free_swap(struct page *page,
+                                          struct vm_area_struct *vma,
+                                          unsigned int fault_flags)
+{
+       if (!PageSwapCache(page))
+               return false;
+       if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) ||
+           PageMlocked(page))
+               return true;
+       /*
+        * If we want to map a page that's in the swapcache writable, we
+        * have to detect via the refcount if we're really the exclusive
+        * user. Try freeing the swapcache to get rid of the swapcache
+        * reference only in case it's likely that we'll be the exlusive user.
+        */
+       return (fault_flags & FAULT_FLAG_WRITE) && !PageKsm(page) &&
+               page_count(page) == 2;
+}
+
  /*
   * We enter with non-exclusive mmap_lock (to exclude vma changes,
   * but allow concurrent faults), and pte mapped but not yet locked.
@@ -3630,6 +3649,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                         page = swapcache;
                         goto out_page;
                 }
+
+               /*
+                * If we want to map a page that's in the swapcache writable, we
+                * have to detect via the refcount if we're really the exclusive
+                * owner. Try removing the extra reference from the local LRU
+                * pagevecs if required.
+                */
+               if ((vmf->flags & FAULT_FLAG_WRITE) && page == swapcache &&
+                   !PageKsm(page) && !PageLRU(page))
+                       lru_add_drain();
         }
  
         cgroup_throttle_swaprate(page, GFP_KERNEL);
@@ -3648,19 +3677,25 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
         }
  
         /*
-        * The page isn't present yet, go ahead with the fault.
-        *
-        * Be careful about the sequence of operations here.
-        * To get its accounting right, reuse_swap_page() must be called
-        * while the page is counted on swap but not yet in mapcount i.e.
-        * before page_add_anon_rmap() and swap_free(); try_to_free_swap()
-        * must be called after the swap_free(), or it will never succeed.
+        * Remove the swap entry and conditionally try to free up the swapcache.
+        * We're already holding a reference on the page but haven't mapped it
+        * yet.
          */
+       swap_free(entry);
+       if (should_try_to_free_swap(page, vma, vmf->flags))
+               try_to_free_swap(page);
  
         inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
         dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
         pte = mk_pte(page, vma->vm_page_prot);
-       if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
+
+       /*
+        * Same logic as in do_wp_page(); however, optimize for fresh pages
+        * that are certainly not shared because we just allocated them without
+        * exposing them to the swapcache.
+        */
+       if ((vmf->flags & FAULT_FLAG_WRITE) && !PageKsm(page) &&
+           (page != swapcache || page_count(page) == 1)) {
                 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                 vmf->flags &= ~FAULT_FLAG_WRITE;
                 ret |= VM_FAULT_WRITE;
@@ -3686,10 +3721,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
         set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
         arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
  
-       swap_free(entry);
-       if (mem_cgroup_swap_full(page) ||
-           (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
-               try_to_free_swap(page);
         unlock_page(page);
         if (page != swapcache && swapcache) {
                 /*
author	David Hildenbrand <david@redhat.com>
	Fri, 25 Mar 2022 01:13:40 +0000 (18:13 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Mar 2022 02:06:50 +0000 (19:06 -0700)