OSDN Git Service

mm/vmscan: centralise timeout values for reclaim_throttle
authorMel Gorman <mgorman@techsingularity.net>
Fri, 5 Nov 2021 20:42:42 +0000 (13:42 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 6 Nov 2021 20:30:40 +0000 (13:30 -0700)
Neil Brown raised concerns about callers of reclaim_throttle specifying
a timeout value.  The original timeout values to congestion_wait() were
probably pulled out of thin air or copy&pasted from somewhere else.
This patch centralises the timeout values and selects a timeout based on
the reason for reclaim throttling.  These figures are also pulled out of
the same thin air but better values may be derived

Running a workload that is throttling for inappropriate periods and
tracing mm_vmscan_throttled can be used to pick a more appropriate
value.  Excessive throttling would pick a lower timeout where as
excessive CPU usage in reclaim context would select a larger timeout.
Ideally a large value would always be used and the wakeups would occur
before a timeout but that requires careful testing.

Link: https://lkml.kernel.org/r/20211022144651.19914-7-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: "Darrick J . Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/compaction.c
mm/internal.h
mm/page-writeback.c
mm/vmscan.c

index 7359093..151b04c 100644 (file)
@@ -828,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                if (cc->mode == MIGRATE_ASYNC)
                        return -EAGAIN;
 
-               reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10);
+               reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED);
 
                if (fatal_signal_pending(current))
                        return -EINTR;
index 7dfe74f..f3de3a2 100644 (file)
@@ -130,8 +130,7 @@ extern unsigned long highest_memmap_pfn;
  */
 extern int isolate_lru_page(struct page *page);
 extern void putback_lru_page(struct page *page);
-extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
-                                                               long timeout);
+extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
 
 /*
  * in mm/rmap.c:
index f34f54f..4b01a68 100644 (file)
@@ -2374,7 +2374,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
                 * guess as any.
                 */
                reclaim_throttle(NODE_DATA(numa_node_id()),
-                       VMSCAN_THROTTLE_WRITEBACK, HZ/50);
+                       VMSCAN_THROTTLE_WRITEBACK);
        }
        /*
         * Usually few pages are written by now from those we've just submitted
index 7d3fe59..599e561 100644 (file)
@@ -1006,12 +1006,10 @@ static void handle_write_error(struct address_space *mapping,
        unlock_page(page);
 }
 
-void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
-                                                       long timeout)
+void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
 {
        wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
-       long ret;
-       bool acct_writeback = (reason == VMSCAN_THROTTLE_WRITEBACK);
+       long timeout, ret;
        DEFINE_WAIT(wait);
 
        /*
@@ -1023,17 +1021,43 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
            current->flags & (PF_IO_WORKER|PF_KTHREAD))
                return;
 
-       if (acct_writeback &&
-           atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
-               WRITE_ONCE(pgdat->nr_reclaim_start,
-                       node_page_state(pgdat, NR_THROTTLED_WRITTEN));
+       /*
+        * These figures are pulled out of thin air.
+        * VMSCAN_THROTTLE_ISOLATED is a transient condition based on too many
+        * parallel reclaimers which is a short-lived event so the timeout is
+        * short. Failing to make progress or waiting on writeback are
+        * potentially long-lived events so use a longer timeout. This is shaky
+        * logic as a failure to make progress could be due to anything from
+        * writeback to a slow device to excessive references pages at the tail
+        * of the inactive LRU.
+        */
+       switch(reason) {
+       case VMSCAN_THROTTLE_WRITEBACK:
+               timeout = HZ/10;
+
+               if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
+                       WRITE_ONCE(pgdat->nr_reclaim_start,
+                               node_page_state(pgdat, NR_THROTTLED_WRITTEN));
+               }
+
+               break;
+       case VMSCAN_THROTTLE_NOPROGRESS:
+               timeout = HZ/10;
+               break;
+       case VMSCAN_THROTTLE_ISOLATED:
+               timeout = HZ/50;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               timeout = HZ;
+               break;
        }
 
        prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
        ret = schedule_timeout(timeout);
        finish_wait(wqh, &wait);
 
-       if (acct_writeback)
+       if (reason == VMSCAN_THROTTLE_WRITEBACK)
                atomic_dec(&pgdat->nr_writeback_throttled);
 
        trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout),
@@ -2318,7 +2342,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
                /* wait a bit for the reclaimer. */
                stalled = true;
-               reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10);
+               reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED);
 
                /* We are about to die and free our memory. Return now. */
                if (fatal_signal_pending(current))
@@ -3250,7 +3274,7 @@ again:
                 * until some pages complete writeback.
                 */
                if (sc->nr.immediate)
-                       reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10);
+                       reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
        }
 
        /*
@@ -3274,7 +3298,7 @@ again:
        if (!current_is_kswapd() && current_may_throttle() &&
            !sc->hibernation_mode &&
            test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
-               reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10);
+               reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
 
        if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
                                    sc))
@@ -3346,7 +3370,7 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc)
 
        /* Throttle if making no progress at high prioities. */
        if (sc->priority < DEF_PRIORITY - 2)
-               reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS, HZ/10);
+               reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS);
 }
 
 /*