OSDN Git Service

mm: reclaim mustn't enter FS for SWP_FS_OPS swap-space
authorNeilBrown <neilb@suse.de>
Tue, 10 May 2022 01:20:48 +0000 (18:20 -0700)
committerakpm <akpm@linux-foundation.org>
Tue, 10 May 2022 01:20:48 +0000 (18:20 -0700)
If swap-out is using filesystem operations (SWP_FS_OPS), then it is not
safe to enter the FS for reclaim.  So only down-grade the requirement for
swap pages to __GFP_IO after checking that SWP_FS_OPS are not being used.

This makes the calculation of "may_enter_fs" slightly more complex, so
move it into a separate function.  with that done, there is little value
in maintaining the bool variable any more.  So replace the may_enter_fs
variable with a may_enter_fs() function.  This removes any risk for the
variable becoming out-of-date.

Link: https://lkml.kernel.org/r/164859778124.29473.16176717935781721855.stgit@noble.brown
Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: David Howells <dhowells@redhat.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/swap.h
mm/vmscan.c

index f8265bf..e19f185 100644 (file)
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -50,6 +50,10 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
 struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
                              struct vm_fault *vmf);
 
+static inline unsigned int page_swap_flags(struct page *page)
+{
+       return page_swap_info(page)->flags;
+}
 #else /* CONFIG_SWAP */
 static inline int swap_readpage(struct page *page, bool do_poll)
 {
@@ -129,5 +133,9 @@ static inline void clear_shadow_from_swap_cache(int type, unsigned long begin,
 {
 }
 
+static inline unsigned int page_swap_flags(struct page *page)
+{
+       return 0;
+}
 #endif /* CONFIG_SWAP */
 #endif /* _MM_SWAP_H */
index b1b91d5..95ebcb5 100644 (file)
@@ -1502,6 +1502,22 @@ static unsigned int demote_page_list(struct list_head *demote_pages,
        return nr_succeeded;
 }
 
+static bool may_enter_fs(struct page *page, gfp_t gfp_mask)
+{
+       if (gfp_mask & __GFP_FS)
+               return true;
+       if (!PageSwapCache(page) || !(gfp_mask & __GFP_IO))
+               return false;
+       /*
+        * We can "enter_fs" for swap-cache with only __GFP_IO
+        * providing this isn't SWP_FS_OPS.
+        * ->flags can be updated non-atomicially (scan_swap_map_slots),
+        * but that will never affect SWP_FS_OPS, so the data_race
+        * is safe.
+        */
+       return !data_race(page_swap_flags(page) & SWP_FS_OPS);
+}
+
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -1528,7 +1544,7 @@ retry:
                struct page *page;
                struct folio *folio;
                enum page_references references = PAGEREF_RECLAIM;
-               bool dirty, writeback, may_enter_fs;
+               bool dirty, writeback;
                unsigned int nr_pages;
 
                cond_resched();
@@ -1553,9 +1569,6 @@ retry:
                if (!sc->may_unmap && page_mapped(page))
                        goto keep_locked;
 
-               may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
-                       (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
-
                /*
                 * The number of dirty pages determines if a node is marked
                 * reclaim_congested. kswapd will stall and start writing
@@ -1598,7 +1611,7 @@ retry:
                 *    not to fs). In this case mark the page for immediate
                 *    reclaim and continue scanning.
                 *
-                *    Require may_enter_fs because we would wait on fs, which
+                *    Require may_enter_fs() because we would wait on fs, which
                 *    may not have submitted IO yet. And the loop driver might
                 *    enter reclaim, and deadlock if it waits on a page for
                 *    which it is needed to do the write (loop masks off
@@ -1630,7 +1643,7 @@ retry:
 
                        /* Case 2 above */
                        } else if (writeback_throttling_sane(sc) ||
-                           !PageReclaim(page) || !may_enter_fs) {
+                           !PageReclaim(page) || !may_enter_fs(page, sc->gfp_mask)) {
                                /*
                                 * This is slightly racy - end_page_writeback()
                                 * might have just cleared PageReclaim, then
@@ -1720,8 +1733,6 @@ retry:
                                                goto activate_locked_split;
                                }
 
-                               may_enter_fs = true;
-
                                /* Adding to swap updated mapping */
                                mapping = page_mapping(page);
                        }
@@ -1792,7 +1803,7 @@ retry:
 
                        if (references == PAGEREF_RECLAIM_CLEAN)
                                goto keep_locked;
-                       if (!may_enter_fs)
+                       if (!may_enter_fs(page, sc->gfp_mask))
                                goto keep_locked;
                        if (!sc->may_writepage)
                                goto keep_locked;