Merge branch 'for-next/faultaround' into for-next/core

Initialise prefaulted PTEs as 'old' for arm64 when hardware access-flag
updates are supported, which drastically improves vmscan performance.

* for-next/faultaround:
  mm: filemap: Fix microblaze build failure with 'mmu_defconfig'
  mm/nommu: Fix return type of filemap_map_pages()
  mm: Mark anonymous struct field of 'struct vm_fault' as 'const'
  mm: Use static initialisers for immutable fields of 'struct vm_fault'
  mm: Avoid modifying vmf.address in __collapse_huge_page_swapin()
  mm: Pass 'address' to map to do_set_pte() and drop FAULT_FLAG_PREFAULT
  mm: Move immutable fields of 'struct vm_fault' into anonymous struct
  arm64: mm: Implement arch_wants_old_prefaulted_pte()
  mm: Allow architectures to request 'old' entries when prefaulting
  mm: Cleanup faultaround and finish_fault() codepaths
This commit is contained in:
Will Deacon 2021-02-12 14:59:10 +00:00
commit 6b76c3aedb
10 changed files with 282 additions and 229 deletions

View file

@ -980,7 +980,17 @@ static inline bool arch_faults_on_old_pte(void)
return !cpu_has_hw_af();
}
#define arch_faults_on_old_pte arch_faults_on_old_pte
#define arch_faults_on_old_pte arch_faults_on_old_pte
/*
* Experimentally, it's cheap to set the access flag in hardware and we
* benefit from prefaulting mappings as 'old' to start with.
*/
static inline bool arch_wants_old_prefaulted_pte(void)
{
return !arch_faults_on_old_pte();
}
#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
#endif /* !__ASSEMBLY__ */

View file

@ -1319,17 +1319,19 @@ xfs_filemap_pfn_mkwrite(
return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
}
static void
static vm_fault_t
xfs_filemap_map_pages(
struct vm_fault *vmf,
pgoff_t start_pgoff,
pgoff_t end_pgoff)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
vm_fault_t ret;
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
filemap_map_pages(vmf, start_pgoff, end_pgoff);
ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
return ret;
}
static const struct vm_operations_struct xfs_file_vm_ops = {

View file

@ -514,11 +514,14 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags)
* pgoff should be used in favour of virtual_address, if possible.
*/
struct vm_fault {
struct vm_area_struct *vma; /* Target VMA */
unsigned int flags; /* FAULT_FLAG_xxx flags */
gfp_t gfp_mask; /* gfp mask to be used for allocations */
pgoff_t pgoff; /* Logical page offset based on vma */
unsigned long address; /* Faulting virtual address */
const struct {
struct vm_area_struct *vma; /* Target VMA */
gfp_t gfp_mask; /* gfp mask to be used for allocations */
pgoff_t pgoff; /* Logical page offset based on vma */
unsigned long address; /* Faulting virtual address */
};
unsigned int flags; /* FAULT_FLAG_xxx flags
* XXX: should really be 'const' */
pmd_t *pmd; /* Pointer to pmd entry matching
* the 'address' */
pud_t *pud; /* Pointer to pud entry matching
@ -542,8 +545,8 @@ struct vm_fault {
* is not NULL, otherwise pmd.
*/
pgtable_t prealloc_pte; /* Pre-allocated pte page table.
* vm_ops->map_pages() calls
* alloc_set_pte() from atomic context.
* vm_ops->map_pages() sets up a page
* table from atomic context.
* do_fault_around() pre-allocates
* page table to avoid allocation from
* atomic context.
@ -578,7 +581,7 @@ struct vm_operations_struct {
vm_fault_t (*fault)(struct vm_fault *vmf);
vm_fault_t (*huge_fault)(struct vm_fault *vmf,
enum page_entry_size pe_size);
void (*map_pages)(struct vm_fault *vmf,
vm_fault_t (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
unsigned long (*pagesize)(struct vm_area_struct * area);
@ -988,7 +991,9 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
return pte;
}
vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page);
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr);
vm_fault_t finish_fault(struct vm_fault *vmf);
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
#endif
@ -2622,7 +2627,7 @@ extern void truncate_inode_pages_final(struct address_space *);
/* generic vm_area_ops exported for stackable file systems */
extern vm_fault_t filemap_fault(struct vm_fault *vmf);
extern void filemap_map_pages(struct vm_fault *vmf,
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);

View file

@ -1314,6 +1314,17 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
#endif
}
/*
* the ordering of these checks is important for pmds with _page_devmap set.
* if we check pmd_trans_unstable() first we will trip the bad_pmd() check
* inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly
* returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
*/
static inline int pmd_devmap_trans_unstable(pmd_t *pmd)
{
return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
}
#ifndef CONFIG_NUMA_BALANCING
/*
* Technically a PTE can be PROTNONE even when not doing NUMA balancing but

View file

@ -42,6 +42,8 @@
#include <linux/psi.h>
#include <linux/ramfs.h>
#include <linux/page_idle.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
@ -2911,74 +2913,163 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_fault);
void filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff)
static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
{
struct file *file = vmf->vma->vm_file;
struct mm_struct *mm = vmf->vma->vm_mm;
/* Huge page is mapped? No need to proceed. */
if (pmd_trans_huge(*vmf->pmd)) {
unlock_page(page);
put_page(page);
return true;
}
if (pmd_none(*vmf->pmd) && PageTransHuge(page)) {
vm_fault_t ret = do_set_pmd(vmf, page);
if (!ret) {
/* The page is mapped successfully, reference consumed. */
unlock_page(page);
return true;
}
}
if (pmd_none(*vmf->pmd)) {
vmf->ptl = pmd_lock(mm, vmf->pmd);
if (likely(pmd_none(*vmf->pmd))) {
mm_inc_nr_ptes(mm);
pmd_populate(mm, vmf->pmd, vmf->prealloc_pte);
vmf->prealloc_pte = NULL;
}
spin_unlock(vmf->ptl);
}
/* See comment in handle_pte_fault() */
if (pmd_devmap_trans_unstable(vmf->pmd)) {
unlock_page(page);
put_page(page);
return true;
}
return false;
}
static struct page *next_uptodate_page(struct page *page,
struct address_space *mapping,
struct xa_state *xas, pgoff_t end_pgoff)
{
unsigned long max_idx;
do {
if (!page)
return NULL;
if (xas_retry(xas, page))
continue;
if (xa_is_value(page))
continue;
if (PageLocked(page))
continue;
if (!page_cache_get_speculative(page))
continue;
/* Has the page moved or been split? */
if (unlikely(page != xas_reload(xas)))
goto skip;
if (!PageUptodate(page) || PageReadahead(page))
goto skip;
if (PageHWPoison(page))
goto skip;
if (!trylock_page(page))
goto skip;
if (page->mapping != mapping)
goto unlock;
if (!PageUptodate(page))
goto unlock;
max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
if (xas->xa_index >= max_idx)
goto unlock;
return page;
unlock:
unlock_page(page);
skip:
put_page(page);
} while ((page = xas_next_entry(xas, end_pgoff)) != NULL);
return NULL;
}
static inline struct page *first_map_page(struct address_space *mapping,
struct xa_state *xas,
pgoff_t end_pgoff)
{
return next_uptodate_page(xas_find(xas, end_pgoff),
mapping, xas, end_pgoff);
}
static inline struct page *next_map_page(struct address_space *mapping,
struct xa_state *xas,
pgoff_t end_pgoff)
{
return next_uptodate_page(xas_next_entry(xas, end_pgoff),
mapping, xas, end_pgoff);
}
vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff)
{
struct vm_area_struct *vma = vmf->vma;
struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
pgoff_t last_pgoff = start_pgoff;
unsigned long max_idx;
unsigned long addr;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct page *head, *page;
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
vm_fault_t ret = 0;
rcu_read_lock();
xas_for_each(&xas, head, end_pgoff) {
if (xas_retry(&xas, head))
continue;
if (xa_is_value(head))
goto next;
head = first_map_page(mapping, &xas, end_pgoff);
if (!head)
goto out;
/*
* Check for a locked page first, as a speculative
* reference may adversely influence page migration.
*/
if (PageLocked(head))
goto next;
if (!page_cache_get_speculative(head))
goto next;
if (filemap_map_pmd(vmf, head)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
/* Has the page moved or been split? */
if (unlikely(head != xas_reload(&xas)))
goto skip;
addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
do {
page = find_subpage(head, xas.xa_index);
if (!PageUptodate(head) ||
PageReadahead(page) ||
PageHWPoison(page))
goto skip;
if (!trylock_page(head))
goto skip;
if (head->mapping != mapping || !PageUptodate(head))
goto unlock;
max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
if (xas.xa_index >= max_idx)
if (PageHWPoison(page))
goto unlock;
if (mmap_miss > 0)
mmap_miss--;
vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
if (vmf->pte)
vmf->pte += xas.xa_index - last_pgoff;
addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
vmf->pte += xas.xa_index - last_pgoff;
last_pgoff = xas.xa_index;
if (alloc_set_pte(vmf, page))
if (!pte_none(*vmf->pte))
goto unlock;
/* We're about to handle the fault */
if (vmf->address == addr)
ret = VM_FAULT_NOPAGE;
do_set_pte(vmf, page, addr);
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, addr, vmf->pte);
unlock_page(head);
goto next;
continue;
unlock:
unlock_page(head);
skip:
put_page(head);
next:
/* Huge page is mapped? No need to proceed. */
if (pmd_trans_huge(*vmf->pmd))
break;
}
} while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL);
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
rcu_read_unlock();
WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
return ret;
}
EXPORT_SYMBOL(filemap_map_pages);

View file

@ -991,38 +991,41 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
static bool __collapse_huge_page_swapin(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd,
unsigned long haddr, pmd_t *pmd,
int referenced)
{
int swapped_in = 0;
vm_fault_t ret = 0;
struct vm_fault vmf = {
.vma = vma,
.address = address,
.flags = FAULT_FLAG_ALLOW_RETRY,
.pmd = pmd,
.pgoff = linear_page_index(vma, address),
};
unsigned long address, end = haddr + (HPAGE_PMD_NR * PAGE_SIZE);
vmf.pte = pte_offset_map(pmd, address);
for (; vmf.address < address + HPAGE_PMD_NR*PAGE_SIZE;
vmf.pte++, vmf.address += PAGE_SIZE) {
for (address = haddr; address < end; address += PAGE_SIZE) {
struct vm_fault vmf = {
.vma = vma,
.address = address,
.pgoff = linear_page_index(vma, haddr),
.flags = FAULT_FLAG_ALLOW_RETRY,
.pmd = pmd,
};
vmf.pte = pte_offset_map(pmd, address);
vmf.orig_pte = *vmf.pte;
if (!is_swap_pte(vmf.orig_pte))
if (!is_swap_pte(vmf.orig_pte)) {
pte_unmap(vmf.pte);
continue;
}
swapped_in++;
ret = do_swap_page(&vmf);
/* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */
if (ret & VM_FAULT_RETRY) {
mmap_read_lock(mm);
if (hugepage_vma_revalidate(mm, address, &vmf.vma)) {
if (hugepage_vma_revalidate(mm, haddr, &vma)) {
/* vma is no longer available, don't continue to swapin */
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
}
/* check if the pmd is still valid */
if (mm_find_pmd(mm, address) != pmd) {
if (mm_find_pmd(mm, haddr) != pmd) {
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
}
@ -1031,11 +1034,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
}
/* pte is unmapped now, we need to map it */
vmf.pte = pte_offset_map(pmd, vmf.address);
}
vmf.pte--;
pte_unmap(vmf.pte);
/* Drain LRU add pagevec to remove extra pin on the swapped in pages */
if (swapped_in)

View file

@ -134,6 +134,18 @@ static inline bool arch_faults_on_old_pte(void)
}
#endif
#ifndef arch_wants_old_prefaulted_pte
static inline bool arch_wants_old_prefaulted_pte(void)
{
/*
* Transitioning a PTE from 'old' to 'young' can be expensive on
* some architectures, even if it's performed in hardware. By
* default, "false" means prefaulted entries will be 'young'.
*/
return false;
}
#endif
static int __init disable_randmaps(char *s)
{
randomize_va_space = 0;
@ -3503,7 +3515,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
if (pte_alloc(vma->vm_mm, vmf->pmd))
return VM_FAULT_OOM;
/* See the comment in pte_alloc_one_map() */
/* See comment in handle_pte_fault() */
if (unlikely(pmd_trans_unstable(vmf->pmd)))
return 0;
@ -3643,66 +3655,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
return ret;
}
/*
* The ordering of these checks is important for pmds with _PAGE_DEVMAP set.
* If we check pmd_trans_unstable() first we will trip the bad_pmd() check
* inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly
* returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
*/
static int pmd_devmap_trans_unstable(pmd_t *pmd)
{
return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
}
static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
if (!pmd_none(*vmf->pmd))
goto map_pte;
if (vmf->prealloc_pte) {
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (unlikely(!pmd_none(*vmf->pmd))) {
spin_unlock(vmf->ptl);
goto map_pte;
}
mm_inc_nr_ptes(vma->vm_mm);
pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
spin_unlock(vmf->ptl);
vmf->prealloc_pte = NULL;
} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
return VM_FAULT_OOM;
}
map_pte:
/*
* If a huge pmd materialized under us just retry later. Use
* pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of
* pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge
* under us and then back to pmd_none, as a result of MADV_DONTNEED
* running immediately after a huge pmd fault in a different thread of
* this mm, in turn leading to a misleading pmd_trans_huge() retval.
* All we have to ensure is that it is a regular pmd that we can walk
* with pte_offset_map() and we can do that through an atomic read in
* C, which is what pmd_trans_unstable() provides.
*/
if (pmd_devmap_trans_unstable(vmf->pmd))
return VM_FAULT_NOPAGE;
/*
* At this point we know that our vmf->pmd points to a page of ptes
* and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge()
* for the duration of the fault. If a racing MADV_DONTNEED runs and
* we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still
* be valid and we will re-check to make sure the vmf->pte isn't
* pte_none() under vmf->ptl protection when we return to
* alloc_set_pte().
*/
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);
return 0;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void deposit_prealloc_pte(struct vm_fault *vmf)
{
@ -3717,7 +3669,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
vmf->prealloc_pte = NULL;
}
static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
{
struct vm_area_struct *vma = vmf->vma;
bool write = vmf->flags & FAULT_FLAG_WRITE;
@ -3775,76 +3727,41 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
return ret;
}
#else
static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
{
BUILD_BUG();
return 0;
return VM_FAULT_FALLBACK;
}
#endif
/**
* alloc_set_pte - setup new PTE entry for given page and add reverse page
* mapping. If needed, the function allocates page table or use pre-allocated.
*
* @vmf: fault environment
* @page: page to map
*
* Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on
* return.
*
* Target users are page handler itself and implementations of
* vm_ops->map_pages.
*
* Return: %0 on success, %VM_FAULT_ code in case of error.
*/
vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
{
struct vm_area_struct *vma = vmf->vma;
bool write = vmf->flags & FAULT_FLAG_WRITE;
bool prefault = vmf->address != addr;
pte_t entry;
vm_fault_t ret;
if (pmd_none(*vmf->pmd) && PageTransCompound(page)) {
ret = do_set_pmd(vmf, page);
if (ret != VM_FAULT_FALLBACK)
return ret;
}
if (!vmf->pte) {
ret = pte_alloc_one_map(vmf);
if (ret)
return ret;
}
/* Re-check under ptl */
if (unlikely(!pte_none(*vmf->pte))) {
update_mmu_tlb(vma, vmf->address, vmf->pte);
return VM_FAULT_NOPAGE;
}
flush_icache_page(vma, page);
entry = mk_pte(page, vma->vm_page_prot);
entry = pte_sw_mkyoung(entry);
if (prefault && arch_wants_old_prefaulted_pte())
entry = pte_mkold(entry);
else
entry = pte_sw_mkyoung(entry);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/* copy-on-write page */
if (write && !(vma->vm_flags & VM_SHARED)) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, vmf->address, false);
page_add_new_anon_rmap(page, vma, addr, false);
lru_cache_add_inactive_or_unevictable(page, vma);
} else {
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
page_add_file_rmap(page, false);
}
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
/* no need to invalidate: a not-present page won't be cached */
update_mmu_cache(vma, vmf->address, vmf->pte);
return 0;
set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
}
/**
* finish_fault - finish page fault once we have prepared the page to fault
*
@ -3862,12 +3779,12 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page)
*/
vm_fault_t finish_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct page *page;
vm_fault_t ret = 0;
vm_fault_t ret;
/* Did we COW the page? */
if ((vmf->flags & FAULT_FLAG_WRITE) &&
!(vmf->vma->vm_flags & VM_SHARED))
if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
page = vmf->cow_page;
else
page = vmf->page;
@ -3876,12 +3793,38 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
* check even for read faults because we might have lost our CoWed
* page
*/
if (!(vmf->vma->vm_flags & VM_SHARED))
ret = check_stable_address_space(vmf->vma->vm_mm);
if (!ret)
ret = alloc_set_pte(vmf, page);
if (vmf->pte)
pte_unmap_unlock(vmf->pte, vmf->ptl);
if (!(vma->vm_flags & VM_SHARED)) {
ret = check_stable_address_space(vma->vm_mm);
if (ret)
return ret;
}
if (pmd_none(*vmf->pmd)) {
if (PageTransCompound(page)) {
ret = do_set_pmd(vmf, page);
if (ret != VM_FAULT_FALLBACK)
return ret;
}
if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
return VM_FAULT_OOM;
}
/* See comment in handle_pte_fault() */
if (pmd_devmap_trans_unstable(vmf->pmd))
return 0;
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
ret = 0;
/* Re-check under ptl */
if (likely(pte_none(*vmf->pte)))
do_set_pte(vmf, page, vmf->address);
else
ret = VM_FAULT_NOPAGE;
update_mmu_tlb(vma, vmf->address, vmf->pte);
pte_unmap_unlock(vmf->pte, vmf->ptl);
return ret;
}
@ -3951,13 +3894,12 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
pgoff_t start_pgoff = vmf->pgoff;
pgoff_t end_pgoff;
int off;
vm_fault_t ret = 0;
nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
vmf->address = max(address & mask, vmf->vma->vm_start);
off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
address = max(address & mask, vmf->vma->vm_start);
off = ((vmf->address - address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
start_pgoff -= off;
/*
@ -3965,7 +3907,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
* the vma or nr_pages from start_pgoff, depending what is nearest.
*/
end_pgoff = start_pgoff -
((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
PTRS_PER_PTE - 1;
end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
start_pgoff + nr_pages - 1);
@ -3973,31 +3915,11 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
if (pmd_none(*vmf->pmd)) {
vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
if (!vmf->prealloc_pte)
goto out;
return VM_FAULT_OOM;
smp_wmb(); /* See comment in __pte_alloc() */
}
vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
/* Huge page is mapped? Page fault is solved */
if (pmd_trans_huge(*vmf->pmd)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
/* ->map_pages() haven't done anything useful. Cold page cache? */
if (!vmf->pte)
goto out;
/* check if the page fault is solved */
vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT);
if (!pte_none(*vmf->pte))
ret = VM_FAULT_NOPAGE;
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
vmf->address = address;
vmf->pte = NULL;
return ret;
return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff);
}
static vm_fault_t do_read_fault(struct vm_fault *vmf)
@ -4353,7 +4275,18 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
*/
vmf->pte = NULL;
} else {
/* See comment in pte_alloc_one_map() */
/*
* If a huge pmd materialized under us just retry later. Use
* pmd_trans_unstable() via pmd_devmap_trans_unstable() instead
* of pmd_trans_huge() to ensure the pmd didn't become
* pmd_trans_huge under us and then back to pmd_none, as a
* result of MADV_DONTNEED running immediately after a huge pmd
* fault in a different thread of this mm, in turn leading to a
* misleading pmd_trans_huge() retval. All we have to ensure is
* that it is a regular pmd that we can walk with
* pte_offset_map() and we can do that through an atomic read
* in C, which is what pmd_trans_unstable() provides.
*/
if (pmd_devmap_trans_unstable(vmf->pmd))
return 0;
/*

View file

@ -1668,10 +1668,11 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_fault);
void filemap_map_pages(struct vm_fault *vmf,
vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff)
{
BUG();
return 0;
}
EXPORT_SYMBOL(filemap_map_pages);

View file

@ -1520,11 +1520,11 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
{
struct vm_area_struct pvma;
struct page *page;
struct vm_fault vmf;
struct vm_fault vmf = {
.vma = &pvma,
};
shmem_pseudo_vma_init(&pvma, info, index);
vmf.vma = &pvma;
vmf.address = 0;
page = swap_cluster_readahead(swap, gfp, &vmf);
shmem_pseudo_vma_destroy(&pvma);

View file

@ -1951,8 +1951,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
si = swap_info[type];
pte = pte_offset_map(pmd, addr);
do {
struct vm_fault vmf;
if (!is_swap_pte(*pte))
continue;
@ -1968,9 +1966,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
swap_map = &si->swap_map[offset];
page = lookup_swap_cache(entry, vma, addr);
if (!page) {
vmf.vma = vma;
vmf.address = addr;
vmf.pmd = pmd;
struct vm_fault vmf = {
.vma = vma,
.address = addr,
.pmd = pmd,
};
page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
&vmf);
}