mm: handle swap page faults under per-VMA lock

When page fault is handled under per-VMA lock protection, all swap page
faults are retried with mmap_lock because folio_lock_or_retry has to drop
and reacquire mmap_lock if folio could not be immediately locked.  Follow
the same pattern as mmap_lock to drop per-VMA lock when waiting for folio
and retrying once folio is available.

With this obstacle removed, enable do_swap_page to operate under per-VMA
lock protection.  Drivers implementing ops->migrate_to_ram might still
rely on mmap_lock, therefore we have to fall back to mmap_lock in that
particular case.

Note that the only time do_swap_page calls synchronous swap_readpage is
when SWP_SYNCHRONOUS_IO is set, which is only set for
QUEUE_FLAG_SYNCHRONOUS devices: brd, zram and nvdimms (both btt and pmem).
Therefore we don't sleep in this path, and there's no need to drop the
mmap or per-VMA lock.

Link: https://lkml.kernel.org/r/20230630211957.1341547-6-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Tested-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hillf Danton <hdanton@sina.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michel Lespinasse <michel@lespinasse.org>
Cc: Minchan Kim <minchan@google.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Punit Agrawal <punit.agrawal@bytedance.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Suren Baghdasaryan 2023-06-30 14:19:56 -07:00 committed by Andrew Morton
parent fdc724d6aa
commit 1235ccd05b
3 changed files with 31 additions and 15 deletions

View File

@ -729,6 +729,14 @@ static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached)
vma->detached = detached;
}
static inline void release_fault_lock(struct vm_fault *vmf)
{
if (vmf->flags & FAULT_FLAG_VMA_LOCK)
vma_end_read(vmf->vma);
else
mmap_read_unlock(vmf->vma->vm_mm);
}
struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
unsigned long address);
@ -749,6 +757,11 @@ static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
return NULL;
}
static inline void release_fault_lock(struct vm_fault *vmf)
{
mmap_read_unlock(vmf->vma->vm_mm);
}
#endif /* CONFIG_PER_VMA_LOCK */
extern const struct vm_operations_struct vma_dummy_vm_ops;

View File

@ -1671,27 +1671,26 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
* Return values:
* 0 - folio is locked.
* non-zero - folio is not locked.
* mmap_lock has been released (mmap_read_unlock(), unless flags had both
* FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
* which case mmap_lock is still held.
* mmap_lock or per-VMA lock has been released (mmap_read_unlock() or
* vma_end_read()), unless flags had both FAULT_FLAG_ALLOW_RETRY and
* FAULT_FLAG_RETRY_NOWAIT set, in which case the lock is still held.
*
* If neither ALLOW_RETRY nor KILLABLE are set, will always return 0
* with the folio locked and the mmap_lock unperturbed.
* with the folio locked and the mmap_lock/per-VMA lock is left unperturbed.
*/
vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf)
{
struct mm_struct *mm = vmf->vma->vm_mm;
unsigned int flags = vmf->flags;
if (fault_flag_allow_retry_first(flags)) {
/*
* CAUTION! In this case, mmap_lock is not released
* even though return VM_FAULT_RETRY.
* CAUTION! In this case, mmap_lock/per-VMA lock is not
* released even though returning VM_FAULT_RETRY.
*/
if (flags & FAULT_FLAG_RETRY_NOWAIT)
return VM_FAULT_RETRY;
mmap_read_unlock(mm);
release_fault_lock(vmf);
if (flags & FAULT_FLAG_KILLABLE)
folio_wait_locked_killable(folio);
else
@ -1703,7 +1702,7 @@ vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf)
ret = __folio_lock_killable(folio);
if (ret) {
mmap_read_unlock(mm);
release_fault_lock(vmf);
return VM_FAULT_RETRY;
}
} else {

View File

@ -3746,12 +3746,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (!pte_unmap_same(vmf))
goto out;
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
ret = VM_FAULT_RETRY;
vma_end_read(vma);
goto out;
}
entry = pte_to_swp_entry(vmf->orig_pte);
if (unlikely(non_swap_entry(entry))) {
if (is_migration_entry(entry)) {
@ -3761,6 +3755,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vmf->page = pfn_swap_entry_to_page(entry);
ret = remove_device_exclusive_entry(vmf);
} else if (is_device_private_entry(entry)) {
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
/*
* migrate_to_ram is not yet ready to operate
* under VMA lock.
*/
vma_end_read(vma);
ret = VM_FAULT_RETRY;
goto out;
}
vmf->page = pfn_swap_entry_to_page(entry);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);