diff --git a/mm/zswap.c b/mm/zswap.c index 2bf4bf1d356c..35da20d3617f 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1207,10 +1207,12 @@ static int zswap_writeback_entry(struct zswap_entry *entry, /* * folio is locked, and the swapcache is now secured against - * concurrent swapping to and from the slot. Verify that the - * swap entry hasn't been invalidated and recycled behind our - * backs (our zswap_entry reference doesn't prevent that), to - * avoid overwriting a new swap folio with old compressed data. + * concurrent swapping to and from the slot, and concurrent + * swapoff so we can safely dereference the zswap tree here. + * Verify that the swap entry hasn't been invalidated and recycled + * behind our backs, to avoid overwriting a new swap folio with + * old compressed data. Only when this is successful can the entry + * be dereferenced. */ tree = swap_zswap_tree(swpentry); spin_lock(&tree->lock); @@ -1263,22 +1265,29 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o int writeback_result; /* - * Rotate the entry to the tail before unlocking the LRU, - * so that in case of an invalidation race concurrent - * reclaimers don't waste their time on it. + * As soon as we drop the LRU lock, the entry can be freed by + * a concurrent invalidation. This means the following: * - * If writeback succeeds, or failure is due to the entry - * being invalidated by the swap subsystem, the invalidation - * will unlink and free it. + * 1. We extract the swp_entry_t to the stack, allowing + * zswap_writeback_entry() to pin the swap entry and + * then validate the zwap entry against that swap entry's + * tree using pointer value comparison. Only when that + * is successful can the entry be dereferenced. * - * Temporary failures, where the same entry should be tried - * again immediately, almost never happen for this shrinker. - * We don't do any trylocking; -ENOMEM comes closest, - * but that's extremely rare and doesn't happen spuriously - * either. Don't bother distinguishing this case. + * 2. Usually, objects are taken off the LRU for reclaim. In + * this case this isn't possible, because if reclaim fails + * for whatever reason, we have no means of knowing if the + * entry is alive to put it back on the LRU. * - * But since they do exist in theory, the entry cannot just - * be unlinked, or we could leak it. Hence, rotate. + * So rotate it before dropping the lock. If the entry is + * written back or invalidated, the free path will unlink + * it. For failures, rotation is the right thing as well. + * + * Temporary failures, where the same entry should be tried + * again immediately, almost never happen for this shrinker. + * We don't do any trylocking; -ENOMEM comes closest, + * but that's extremely rare and doesn't happen spuriously + * either. Don't bother distinguishing this case. */ list_move_tail(item, &l->list);