mm: vmscan: replace shrink_node() loop with a retry jump

Most of the function body is inside a loop, which imposes an additional
indentation and scoping level that makes the code a bit hard to follow and
modify.

The looping only happens in case of reclaim-compaction, which isn't the
common case.  So rather than adding yet another function level to the
reclaim path and have every reclaim invocation go through a level that
only exists for one specific cornercase, use a retry goto.

Link: http://lkml.kernel.org/r/20191022144803.302233-6-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Johannes Weiner 2019-11-30 17:55:43 -08:00 committed by Linus Torvalds
parent b5ead35e7e
commit d2af339706

View file

@ -2729,144 +2729,143 @@ static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
{ {
struct reclaim_state *reclaim_state = current->reclaim_state; struct reclaim_state *reclaim_state = current->reclaim_state;
struct mem_cgroup *root = sc->target_mem_cgroup;
unsigned long nr_reclaimed, nr_scanned; unsigned long nr_reclaimed, nr_scanned;
bool reclaimable = false; bool reclaimable = false;
struct mem_cgroup *memcg;
again:
memset(&sc->nr, 0, sizeof(sc->nr));
nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned;
memcg = mem_cgroup_iter(root, NULL, NULL);
do { do {
struct mem_cgroup *root = sc->target_mem_cgroup; unsigned long reclaimed;
struct mem_cgroup *memcg; unsigned long scanned;
memset(&sc->nr, 0, sizeof(sc->nr)); switch (mem_cgroup_protected(root, memcg)) {
case MEMCG_PROT_MIN:
nr_reclaimed = sc->nr_reclaimed; /*
nr_scanned = sc->nr_scanned; * Hard protection.
* If there is no reclaimable memory, OOM.
memcg = mem_cgroup_iter(root, NULL, NULL); */
do { continue;
unsigned long reclaimed; case MEMCG_PROT_LOW:
unsigned long scanned; /*
* Soft protection.
switch (mem_cgroup_protected(root, memcg)) { * Respect the protection only as long as
case MEMCG_PROT_MIN: * there is an unprotected supply
/* * of reclaimable memory from other cgroups.
* Hard protection. */
* If there is no reclaimable memory, OOM. if (!sc->memcg_low_reclaim) {
*/ sc->memcg_low_skipped = 1;
continue; continue;
case MEMCG_PROT_LOW:
/*
* Soft protection.
* Respect the protection only as long as
* there is an unprotected supply
* of reclaimable memory from other cgroups.
*/
if (!sc->memcg_low_reclaim) {
sc->memcg_low_skipped = 1;
continue;
}
memcg_memory_event(memcg, MEMCG_LOW);
break;
case MEMCG_PROT_NONE:
/*
* All protection thresholds breached. We may
* still choose to vary the scan pressure
* applied based on by how much the cgroup in
* question has exceeded its protection
* thresholds (see get_scan_count).
*/
break;
} }
memcg_memory_event(memcg, MEMCG_LOW);
reclaimed = sc->nr_reclaimed; break;
scanned = sc->nr_scanned; case MEMCG_PROT_NONE:
shrink_node_memcg(pgdat, memcg, sc); /*
* All protection thresholds breached. We may
shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, * still choose to vary the scan pressure
sc->priority); * applied based on by how much the cgroup in
* question has exceeded its protection
/* Record the group's reclaim efficiency */ * thresholds (see get_scan_count).
vmpressure(sc->gfp_mask, memcg, false, */
sc->nr_scanned - scanned, break;
sc->nr_reclaimed - reclaimed);
} while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
if (reclaim_state) {
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0;
} }
/* Record the subtree's reclaim efficiency */ reclaimed = sc->nr_reclaimed;
vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, scanned = sc->nr_scanned;
sc->nr_scanned - nr_scanned, shrink_node_memcg(pgdat, memcg, sc);
sc->nr_reclaimed - nr_reclaimed);
if (sc->nr_reclaimed - nr_reclaimed) shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
reclaimable = true; sc->priority);
if (current_is_kswapd()) { /* Record the group's reclaim efficiency */
/* vmpressure(sc->gfp_mask, memcg, false,
* If reclaim is isolating dirty pages under writeback, sc->nr_scanned - scanned,
* it implies that the long-lived page allocation rate sc->nr_reclaimed - reclaimed);
* is exceeding the page laundering rate. Either the
* global limits are not being effective at throttling
* processes due to the page distribution throughout
* zones or there is heavy usage of a slow backing
* device. The only option is to throttle from reclaim
* context which is not ideal as there is no guarantee
* the dirtying process is throttled in the same way
* balance_dirty_pages() manages.
*
* Once a node is flagged PGDAT_WRITEBACK, kswapd will
* count the number of pages under pages flagged for
* immediate reclaim and stall if any are encountered
* in the nr_immediate check below.
*/
if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/* } while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
* Tag a node as congested if all the dirty pages
* scanned were backed by a congested BDI and
* wait_iff_congested will stall.
*/
if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
set_bit(PGDAT_CONGESTED, &pgdat->flags);
/* Allow kswapd to start writing pages during reclaim.*/ if (reclaim_state) {
if (sc->nr.unqueued_dirty == sc->nr.file_taken) sc->nr_reclaimed += reclaim_state->reclaimed_slab;
set_bit(PGDAT_DIRTY, &pgdat->flags); reclaim_state->reclaimed_slab = 0;
}
/* /* Record the subtree's reclaim efficiency */
* If kswapd scans pages marked marked for immediate vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
* reclaim and under writeback (nr_immediate), it sc->nr_scanned - nr_scanned,
* implies that pages are cycling through the LRU sc->nr_reclaimed - nr_reclaimed);
* faster than they are written so also forcibly stall.
*/ if (sc->nr_reclaimed - nr_reclaimed)
if (sc->nr.immediate) reclaimable = true;
congestion_wait(BLK_RW_ASYNC, HZ/10);
} if (current_is_kswapd()) {
/*
* If reclaim is isolating dirty pages under writeback,
* it implies that the long-lived page allocation rate
* is exceeding the page laundering rate. Either the
* global limits are not being effective at throttling
* processes due to the page distribution throughout
* zones or there is heavy usage of a slow backing
* device. The only option is to throttle from reclaim
* context which is not ideal as there is no guarantee
* the dirtying process is throttled in the same way
* balance_dirty_pages() manages.
*
* Once a node is flagged PGDAT_WRITEBACK, kswapd will
* count the number of pages under pages flagged for
* immediate reclaim and stall if any are encountered
* in the nr_immediate check below.
*/
if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/* /*
* Legacy memcg will stall in page writeback so avoid forcibly * Tag a node as congested if all the dirty pages
* stalling in wait_iff_congested(). * scanned were backed by a congested BDI and
* wait_iff_congested will stall.
*/ */
if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) && if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
sc->nr.dirty && sc->nr.dirty == sc->nr.congested) set_bit(PGDAT_CONGESTED, &pgdat->flags);
set_memcg_congestion(pgdat, root, true);
/* Allow kswapd to start writing pages during reclaim.*/
if (sc->nr.unqueued_dirty == sc->nr.file_taken)
set_bit(PGDAT_DIRTY, &pgdat->flags);
/* /*
* Stall direct reclaim for IO completions if underlying BDIs * If kswapd scans pages marked marked for immediate
* and node is congested. Allow kswapd to continue until it * reclaim and under writeback (nr_immediate), it
* starts encountering unqueued dirty pages or cycling through * implies that pages are cycling through the LRU
* the LRU too quickly. * faster than they are written so also forcibly stall.
*/ */
if (!sc->hibernation_mode && !current_is_kswapd() && if (sc->nr.immediate)
current_may_throttle() && pgdat_memcg_congested(pgdat, root)) congestion_wait(BLK_RW_ASYNC, HZ/10);
wait_iff_congested(BLK_RW_ASYNC, HZ/10); }
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, /*
sc)); * Legacy memcg will stall in page writeback so avoid forcibly
* stalling in wait_iff_congested().
*/
if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) &&
sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
set_memcg_congestion(pgdat, root, true);
/*
* Stall direct reclaim for IO completions if underlying BDIs
* and node is congested. Allow kswapd to continue until it
* starts encountering unqueued dirty pages or cycling through
* the LRU too quickly.
*/
if (!sc->hibernation_mode && !current_is_kswapd() &&
current_may_throttle() && pgdat_memcg_congested(pgdat, root))
wait_iff_congested(BLK_RW_ASYNC, HZ/10);
if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
sc))
goto again;
/* /*
* Kswapd gives up on balancing particular nodes after too * Kswapd gives up on balancing particular nodes after too