linux-stable/include/linux/mm_inline.h
Linus Torvalds 9030fb0bb9 Folio changes for 5.18
- Rewrite how munlock works to massively reduce the contention
    on i_mmap_rwsem (Hugh Dickins):
    https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/
  - Sort out the page refcount mess for ZONE_DEVICE pages (Christoph Hellwig):
    https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/
  - Convert GUP to use folios and make pincount available for order-1
    pages. (Matthew Wilcox)
  - Convert a few more truncation functions to use folios (Matthew Wilcox)
  - Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew Wilcox)
  - Convert rmap_walk to use folios (Matthew Wilcox)
  - Convert most of shrink_page_list() to use a folio (Matthew Wilcox)
  - Add support for creating large folios in readahead (Matthew Wilcox)
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCgAdFiEEejHryeLBw/spnjHrDpNsjXcpgj4FAmI4ucgACgkQDpNsjXcp
 gj69Wgf6AwqwmO5Tmy+fLScDPqWxmXJofbocae1kyoGHf7Ui91OK4U2j6IpvAr+g
 P/vLIK+JAAcTQcrSCjymuEkf4HkGZOR03QQn7maPIEe4eLrZRQDEsmHC1L9gpeJp
 s/GMvDWiGE0Tnxu0EOzfVi/yT+qjIl/S8VvqtCoJv1HdzxitZ7+1RDuqImaMC5MM
 Qi3uHag78vLmCltLXpIOdpgZhdZexCdL2Y/1npf+b6FVkAJRRNUnA0gRbS7YpoVp
 CbxEJcmAl9cpJLuj5i5kIfS9trr+/QcvbUlzRxh4ggC58iqnmF2V09l2MJ7YU3XL
 v1O/Elq4lRhXninZFQEm9zjrri7LDQ==
 =n9Ad
 -----END PGP SIGNATURE-----

Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache

Pull folio updates from Matthew Wilcox:

 - Rewrite how munlock works to massively reduce the contention on
   i_mmap_rwsem (Hugh Dickins):

     https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/

 - Sort out the page refcount mess for ZONE_DEVICE pages (Christoph
   Hellwig):

     https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/

 - Convert GUP to use folios and make pincount available for order-1
   pages. (Matthew Wilcox)

 - Convert a few more truncation functions to use folios (Matthew
   Wilcox)

 - Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew
   Wilcox)

 - Convert rmap_walk to use folios (Matthew Wilcox)

 - Convert most of shrink_page_list() to use a folio (Matthew Wilcox)

 - Add support for creating large folios in readahead (Matthew Wilcox)

* tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache: (114 commits)
  mm/damon: minor cleanup for damon_pa_young
  selftests/vm/transhuge-stress: Support file-backed PMD folios
  mm/filemap: Support VM_HUGEPAGE for file mappings
  mm/readahead: Switch to page_cache_ra_order
  mm/readahead: Align file mappings for non-DAX
  mm/readahead: Add large folio readahead
  mm: Support arbitrary THP sizes
  mm: Make large folios depend on THP
  mm: Fix READ_ONLY_THP warning
  mm/filemap: Allow large folios to be added to the page cache
  mm: Turn can_split_huge_page() into can_split_folio()
  mm/vmscan: Convert pageout() to take a folio
  mm/vmscan: Turn page_check_references() into folio_check_references()
  mm/vmscan: Account large folios correctly
  mm/vmscan: Optimise shrink_page_list for non-PMD-sized folios
  mm/vmscan: Free non-shmem folios without splitting them
  mm/rmap: Constify the rmap_walk_control argument
  mm/rmap: Convert rmap_walk() to take a folio
  mm: Turn page_anon_vma() into folio_anon_vma()
  mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read()
  ...
2022-03-22 17:03:12 -07:00

320 lines
8.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_MM_INLINE_H
#define LINUX_MM_INLINE_H
#include <linux/atomic.h>
#include <linux/huge_mm.h>
#include <linux/swap.h>
#include <linux/string.h>
/**
* folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
* @folio: The folio to test.
*
* We would like to get this info without a page flag, but the state
* needs to survive until the folio is last deleted from the LRU, which
* could be as far down as __page_cache_release.
*
* Return: An integer (not a boolean!) used to sort a folio onto the
* right LRU list and to account folios correctly.
* 1 if @folio is a regular filesystem backed page cache folio
* or a lazily freed anonymous folio (e.g. via MADV_FREE).
* 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise
* ram or swap backed folio.
*/
static inline int folio_is_file_lru(struct folio *folio)
{
return !folio_test_swapbacked(folio);
}
static inline int page_is_file_lru(struct page *page)
{
return folio_is_file_lru(page_folio(page));
}
static __always_inline void update_lru_size(struct lruvec *lruvec,
enum lru_list lru, enum zone_type zid,
long nr_pages)
{
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
__mod_zone_page_state(&pgdat->node_zones[zid],
NR_ZONE_LRU_BASE + lru, nr_pages);
#ifdef CONFIG_MEMCG
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
#endif
}
/**
* __folio_clear_lru_flags - Clear page lru flags before releasing a page.
* @folio: The folio that was on lru and now has a zero reference.
*/
static __always_inline void __folio_clear_lru_flags(struct folio *folio)
{
VM_BUG_ON_FOLIO(!folio_test_lru(folio), folio);
__folio_clear_lru(folio);
/* this shouldn't happen, so leave the flags to bad_page() */
if (folio_test_active(folio) && folio_test_unevictable(folio))
return;
__folio_clear_active(folio);
__folio_clear_unevictable(folio);
}
static __always_inline void __clear_page_lru_flags(struct page *page)
{
__folio_clear_lru_flags(page_folio(page));
}
/**
* folio_lru_list - Which LRU list should a folio be on?
* @folio: The folio to test.
*
* Return: The LRU list a folio should be on, as an index
* into the array of LRU lists.
*/
static __always_inline enum lru_list folio_lru_list(struct folio *folio)
{
enum lru_list lru;
VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio);
if (folio_test_unevictable(folio))
return LRU_UNEVICTABLE;
lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
if (folio_test_active(folio))
lru += LRU_ACTIVE;
return lru;
}
static __always_inline
void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
{
enum lru_list lru = folio_lru_list(folio);
update_lru_size(lruvec, lru, folio_zonenum(folio),
folio_nr_pages(folio));
if (lru != LRU_UNEVICTABLE)
list_add(&folio->lru, &lruvec->lists[lru]);
}
static __always_inline void add_page_to_lru_list(struct page *page,
struct lruvec *lruvec)
{
lruvec_add_folio(lruvec, page_folio(page));
}
static __always_inline
void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
{
enum lru_list lru = folio_lru_list(folio);
update_lru_size(lruvec, lru, folio_zonenum(folio),
folio_nr_pages(folio));
/* This is not expected to be used on LRU_UNEVICTABLE */
list_add_tail(&folio->lru, &lruvec->lists[lru]);
}
static __always_inline void add_page_to_lru_list_tail(struct page *page,
struct lruvec *lruvec)
{
lruvec_add_folio_tail(lruvec, page_folio(page));
}
static __always_inline
void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
{
enum lru_list lru = folio_lru_list(folio);
if (lru != LRU_UNEVICTABLE)
list_del(&folio->lru);
update_lru_size(lruvec, lru, folio_zonenum(folio),
-folio_nr_pages(folio));
}
static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec)
{
lruvec_del_folio(lruvec, page_folio(page));
}
#ifdef CONFIG_ANON_VMA_NAME
/*
* mmap_lock should be read-locked when calling anon_vma_name(). Caller should
* either keep holding the lock while using the returned pointer or it should
* raise anon_vma_name refcount before releasing the lock.
*/
extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma);
extern struct anon_vma_name *anon_vma_name_alloc(const char *name);
extern void anon_vma_name_free(struct kref *kref);
/* mmap_lock should be read-locked */
static inline void anon_vma_name_get(struct anon_vma_name *anon_name)
{
if (anon_name)
kref_get(&anon_name->kref);
}
static inline void anon_vma_name_put(struct anon_vma_name *anon_name)
{
if (anon_name)
kref_put(&anon_name->kref, anon_vma_name_free);
}
static inline
struct anon_vma_name *anon_vma_name_reuse(struct anon_vma_name *anon_name)
{
/* Prevent anon_name refcount saturation early on */
if (kref_read(&anon_name->kref) < REFCOUNT_MAX) {
anon_vma_name_get(anon_name);
return anon_name;
}
return anon_vma_name_alloc(anon_name->name);
}
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
struct vm_area_struct *new_vma)
{
struct anon_vma_name *anon_name = anon_vma_name(orig_vma);
if (anon_name)
new_vma->anon_name = anon_vma_name_reuse(anon_name);
}
static inline void free_anon_vma_name(struct vm_area_struct *vma)
{
/*
* Not using anon_vma_name because it generates a warning if mmap_lock
* is not held, which might be the case here.
*/
if (!vma->vm_file)
anon_vma_name_put(vma->anon_name);
}
static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
struct anon_vma_name *anon_name2)
{
if (anon_name1 == anon_name2)
return true;
return anon_name1 && anon_name2 &&
!strcmp(anon_name1->name, anon_name2->name);
}
#else /* CONFIG_ANON_VMA_NAME */
static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
{
return NULL;
}
static inline struct anon_vma_name *anon_vma_name_alloc(const char *name)
{
return NULL;
}
static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {}
static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {}
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
struct vm_area_struct *new_vma) {}
static inline void free_anon_vma_name(struct vm_area_struct *vma) {}
static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
struct anon_vma_name *anon_name2)
{
return true;
}
#endif /* CONFIG_ANON_VMA_NAME */
static inline void init_tlb_flush_pending(struct mm_struct *mm)
{
atomic_set(&mm->tlb_flush_pending, 0);
}
static inline void inc_tlb_flush_pending(struct mm_struct *mm)
{
atomic_inc(&mm->tlb_flush_pending);
/*
* The only time this value is relevant is when there are indeed pages
* to flush. And we'll only flush pages after changing them, which
* requires the PTL.
*
* So the ordering here is:
*
* atomic_inc(&mm->tlb_flush_pending);
* spin_lock(&ptl);
* ...
* set_pte_at();
* spin_unlock(&ptl);
*
* spin_lock(&ptl)
* mm_tlb_flush_pending();
* ....
* spin_unlock(&ptl);
*
* flush_tlb_range();
* atomic_dec(&mm->tlb_flush_pending);
*
* Where the increment if constrained by the PTL unlock, it thus
* ensures that the increment is visible if the PTE modification is
* visible. After all, if there is no PTE modification, nobody cares
* about TLB flushes either.
*
* This very much relies on users (mm_tlb_flush_pending() and
* mm_tlb_flush_nested()) only caring about _specific_ PTEs (and
* therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc
* locks (PPC) the unlock of one doesn't order against the lock of
* another PTL.
*
* The decrement is ordered by the flush_tlb_range(), such that
* mm_tlb_flush_pending() will not return false unless all flushes have
* completed.
*/
}
static inline void dec_tlb_flush_pending(struct mm_struct *mm)
{
/*
* See inc_tlb_flush_pending().
*
* This cannot be smp_mb__before_atomic() because smp_mb() simply does
* not order against TLB invalidate completion, which is what we need.
*
* Therefore we must rely on tlb_flush_*() to guarantee order.
*/
atomic_dec(&mm->tlb_flush_pending);
}
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
{
/*
* Must be called after having acquired the PTL; orders against that
* PTLs release and therefore ensures that if we observe the modified
* PTE we must also observe the increment from inc_tlb_flush_pending().
*
* That is, it only guarantees to return true if there is a flush
* pending for _this_ PTL.
*/
return atomic_read(&mm->tlb_flush_pending);
}
static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
{
/*
* Similar to mm_tlb_flush_pending(), we must have acquired the PTL
* for which there is a TLB flush pending in order to guarantee
* we've seen both that PTE modification and the increment.
*
* (no requirement on actually still holding the PTL, that is irrelevant)
*/
return atomic_read(&mm->tlb_flush_pending) > 1;
}
#endif