diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 48df896d5b79..e95b2c8081eb 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -26,6 +26,8 @@ void __tlb_remove_table(void *_table); static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, bool delay_rmap, int page_size); +static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb, + struct page *page, unsigned int nr_pages, bool delay_rmap); #define tlb_flush tlb_flush #define pte_free_tlb pte_free_tlb @@ -52,6 +54,21 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, return false; } +static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb, + struct page *page, unsigned int nr_pages, bool delay_rmap) +{ + struct encoded_page *encoded_pages[] = { + encode_page(page, ENCODED_PAGE_BIT_NR_PAGES_NEXT), + encode_nr_pages(nr_pages), + }; + + VM_WARN_ON_ONCE(delay_rmap); + VM_WARN_ON_ONCE(page_folio(page) != page_folio(page + nr_pages - 1)); + + free_pages_and_swap_cache(encoded_pages, ARRAY_SIZE(encoded_pages)); + return false; +} + static inline void tlb_flush(struct mmu_gather *tlb) { __tlb_flush_mm_lazy(tlb->mm); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 127a8230a40a..709830274b75 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -69,6 +69,7 @@ * * - tlb_remove_page() / __tlb_remove_page() * - tlb_remove_page_size() / __tlb_remove_page_size() + * - __tlb_remove_folio_pages() * * __tlb_remove_page_size() is the basic primitive that queues a page for * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a @@ -78,6 +79,11 @@ * tlb_remove_page() and tlb_remove_page_size() imply the call to * tlb_flush_mmu() when required and has no return value. * + * __tlb_remove_folio_pages() is similar to __tlb_remove_page(), however, + * instead of removing a single page, remove the given number of consecutive + * pages that are all part of the same (large) folio: just like calling + * __tlb_remove_page() on each page individually. + * * - tlb_change_page_size() * * call before __tlb_remove_page*() to set the current page-size; implies a @@ -262,6 +268,8 @@ struct mmu_gather_batch { extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, bool delay_rmap, int page_size); +bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page, + unsigned int nr_pages, bool delay_rmap); #ifdef CONFIG_SMP /* diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1b89eec0d6df..a7223ba3ea1e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -226,6 +226,15 @@ struct encoded_page; /* Perform rmap removal after we have flushed the TLB. */ #define ENCODED_PAGE_BIT_DELAY_RMAP 1ul +/* + * The next item in an encoded_page array is the "nr_pages" argument, specifying + * the number of consecutive pages starting from this page, that all belong to + * the same folio. For example, "nr_pages" corresponds to the number of folio + * references that must be dropped. If this bit is not set, "nr_pages" is + * implicitly 1. + */ +#define ENCODED_PAGE_BIT_NR_PAGES_NEXT 2ul + static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags) { BUILD_BUG_ON(flags > ENCODED_PAGE_BITS); @@ -242,6 +251,17 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page); } +static __always_inline struct encoded_page *encode_nr_pages(unsigned long nr) +{ + VM_WARN_ON_ONCE((nr << 2) >> 2 != nr); + return (struct encoded_page *)(nr << 2); +} + +static __always_inline unsigned long encoded_nr_pages(struct encoded_page *page) +{ + return ((unsigned long)page) >> 2; +} + /* * A swap entry has to fit into a "unsigned long", as the entry is hidden * in the "index" field of the swapper address space. diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 6540c99c6758..d175c0f1e2c8 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -50,12 +50,21 @@ static bool tlb_next_batch(struct mmu_gather *tlb) #ifdef CONFIG_SMP static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_struct *vma) { + struct encoded_page **pages = batch->encoded_pages; + for (int i = 0; i < batch->nr; i++) { - struct encoded_page *enc = batch->encoded_pages[i]; + struct encoded_page *enc = pages[i]; if (encoded_page_flags(enc) & ENCODED_PAGE_BIT_DELAY_RMAP) { struct page *page = encoded_page_ptr(enc); - folio_remove_rmap_pte(page_folio(page), page, vma); + unsigned int nr_pages = 1; + + if (unlikely(encoded_page_flags(enc) & + ENCODED_PAGE_BIT_NR_PAGES_NEXT)) + nr_pages = encoded_nr_pages(pages[++i]); + + folio_remove_rmap_ptes(page_folio(page), page, nr_pages, + vma); } } } @@ -89,18 +98,26 @@ static void tlb_batch_pages_flush(struct mmu_gather *tlb) for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { struct encoded_page **pages = batch->encoded_pages; - do { + while (batch->nr) { /* * limit free batch count when PAGE_SIZE > 4K */ unsigned int nr = min(512U, batch->nr); + /* + * Make sure we cover page + nr_pages, and don't leave + * nr_pages behind when capping the number of entries. + */ + if (unlikely(encoded_page_flags(pages[nr - 1]) & + ENCODED_PAGE_BIT_NR_PAGES_NEXT)) + nr++; + free_pages_and_swap_cache(pages, nr); pages += nr; batch->nr -= nr; cond_resched(); - } while (batch->nr); + } } tlb->active = &tlb->local; } @@ -116,8 +133,9 @@ static void tlb_batch_list_free(struct mmu_gather *tlb) tlb->local.next = NULL; } -bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, - bool delay_rmap, int page_size) +static bool __tlb_remove_folio_pages_size(struct mmu_gather *tlb, + struct page *page, unsigned int nr_pages, bool delay_rmap, + int page_size) { int flags = delay_rmap ? ENCODED_PAGE_BIT_DELAY_RMAP : 0; struct mmu_gather_batch *batch; @@ -126,6 +144,8 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, #ifdef CONFIG_MMU_GATHER_PAGE_SIZE VM_WARN_ON(tlb->page_size != page_size); + VM_WARN_ON_ONCE(nr_pages != 1 && page_size != PAGE_SIZE); + VM_WARN_ON_ONCE(page_folio(page) != page_folio(page + nr_pages - 1)); #endif batch = tlb->active; @@ -133,17 +153,40 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, * Add the page and check if we are full. If so * force a flush. */ - batch->encoded_pages[batch->nr++] = encode_page(page, flags); - if (batch->nr == batch->max) { + if (likely(nr_pages == 1)) { + batch->encoded_pages[batch->nr++] = encode_page(page, flags); + } else { + flags |= ENCODED_PAGE_BIT_NR_PAGES_NEXT; + batch->encoded_pages[batch->nr++] = encode_page(page, flags); + batch->encoded_pages[batch->nr++] = encode_nr_pages(nr_pages); + } + /* + * Make sure that we can always add another "page" + "nr_pages", + * requiring two entries instead of only a single one. + */ + if (batch->nr >= batch->max - 1) { if (!tlb_next_batch(tlb)) return true; batch = tlb->active; } - VM_BUG_ON_PAGE(batch->nr > batch->max, page); + VM_BUG_ON_PAGE(batch->nr > batch->max - 1, page); return false; } +bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page, + unsigned int nr_pages, bool delay_rmap) +{ + return __tlb_remove_folio_pages_size(tlb, page, nr_pages, delay_rmap, + PAGE_SIZE); +} + +bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, + bool delay_rmap, int page_size) +{ + return __tlb_remove_folio_pages_size(tlb, page, 1, delay_rmap, page_size); +} + #endif /* MMU_GATHER_NO_GATHER */ #ifdef CONFIG_MMU_GATHER_TABLE_FREE diff --git a/mm/swap.c b/mm/swap.c index cd8f0150ba3a..e5380d732c0d 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -967,11 +967,17 @@ void release_pages(release_pages_arg arg, int nr) unsigned int lock_batch; for (i = 0; i < nr; i++) { + unsigned int nr_refs = 1; struct folio *folio; /* Turn any of the argument types into a folio */ folio = page_folio(encoded_page_ptr(encoded[i])); + /* Is our next entry actually "nr_pages" -> "nr_refs" ? */ + if (unlikely(encoded_page_flags(encoded[i]) & + ENCODED_PAGE_BIT_NR_PAGES_NEXT)) + nr_refs = encoded_nr_pages(encoded[++i]); + /* * Make sure the IRQ-safe lock-holding time does not get * excessive with a continuous string of pages from the @@ -990,14 +996,14 @@ void release_pages(release_pages_arg arg, int nr) unlock_page_lruvec_irqrestore(lruvec, flags); lruvec = NULL; } - if (put_devmap_managed_page(&folio->page)) + if (put_devmap_managed_page_refs(&folio->page, nr_refs)) continue; - if (folio_put_testzero(folio)) + if (folio_ref_sub_and_test(folio, nr_refs)) free_zone_device_page(&folio->page); continue; } - if (!folio_put_testzero(folio)) + if (!folio_ref_sub_and_test(folio, nr_refs)) continue; if (folio_test_large(folio)) { diff --git a/mm/swap_state.c b/mm/swap_state.c index 7255c01a1e4e..2f540748f7c0 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -311,8 +311,19 @@ void free_page_and_swap_cache(struct page *page) void free_pages_and_swap_cache(struct encoded_page **pages, int nr) { lru_add_drain(); - for (int i = 0; i < nr; i++) - free_swap_cache(encoded_page_ptr(pages[i])); + for (int i = 0; i < nr; i++) { + struct page *page = encoded_page_ptr(pages[i]); + + /* + * Skip over the "nr_pages" entry. It's sufficient to call + * free_swap_cache() only once per folio. + */ + if (unlikely(encoded_page_flags(pages[i]) & + ENCODED_PAGE_BIT_NR_PAGES_NEXT)) + i++; + + free_swap_cache(page); + } release_pages(pages, nr); }