linux-stable/include/linux/userfaultfd_k.h
Peter Xu c1991e0705 hugetlb/userfaultfd: forbid huge pmd sharing when uffd enabled
Huge pmd sharing could bring problem to userfaultfd.  The thing is that
userfaultfd is running its logic based on the special bits on page table
entries, however the huge pmd sharing could potentially share page table
entries for different address ranges.  That could cause issues on
either:

 - When sharing huge pmd page tables for an uffd write protected range,
   the newly mapped huge pmd range will also be write protected
   unexpectedly, or,

 - When we try to write protect a range of huge pmd shared range, we'll
   first do huge_pmd_unshare() in hugetlb_change_protection(), however
   that also means the UFFDIO_WRITEPROTECT could be silently skipped for
   the shared region, which could lead to data loss.

While at it, a few other things are done altogether:

 - Move want_pmd_share() from mm/hugetlb.c into linux/hugetlb.h, because
   that's definitely something that arch code would like to use too

 - ARM64 currently directly check against
   CONFIG_ARCH_WANT_HUGE_PMD_SHARE when trying to share huge pmd. Switch
   to the want_pmd_share() helper.

 - Move vma_shareable() from huge_pmd_share() into want_pmd_share().

[peterx@redhat.com: fix build with !ARCH_WANT_HUGE_PMD_SHARE]
  Link: https://lkml.kernel.org/r/20210310185359.88297-1-peterx@redhat.com

Link: https://lkml.kernel.org/r/20210218231202.15426-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Cc: Adam Ruprecht <ruprecht@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Cannon Matthews <cannonmatthews@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chinwen Chang <chinwen.chang@mediatek.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Michal Koutn" <mkoutny@suse.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oliver Upton <oupton@google.com>
Cc: Shaohua Li <shli@fb.com>
Cc: Shawn Anastasio <shawn@anastas.io>
Cc: Steven Price <steven.price@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-05-05 11:27:20 -07:00

196 lines
4.8 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* include/linux/userfaultfd_k.h
*
* Copyright (C) 2015 Red Hat, Inc.
*
*/
#ifndef _LINUX_USERFAULTFD_K_H
#define _LINUX_USERFAULTFD_K_H
#ifdef CONFIG_USERFAULTFD
#include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <asm-generic/pgtable_uffd.h>
/*
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
* new flags, since they might collide with O_* ones. We want
* to re-use O_* flags that couldn't possibly have a meaning
* from userfaultfd, in order to leave a free define-space for
* shared O_* flags.
*/
#define UFFD_CLOEXEC O_CLOEXEC
#define UFFD_NONBLOCK O_NONBLOCK
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
extern int sysctl_unprivileged_userfaultfd;
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long src_start, unsigned long len,
bool *mmap_changing, __u64 mode);
extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
unsigned long dst_start,
unsigned long len,
bool *mmap_changing);
extern int mwriteprotect_range(struct mm_struct *dst_mm,
unsigned long start, unsigned long len,
bool enable_wp, bool *mmap_changing);
/* mm helpers */
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx vm_ctx)
{
return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx;
}
/*
* Never enable huge pmd sharing on uffd-wp registered vmas, because uffd-wp
* protect information is per pgtable entry.
*/
static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma)
{
return vma->vm_flags & VM_UFFD_WP;
}
static inline bool userfaultfd_missing(struct vm_area_struct *vma)
{
return vma->vm_flags & VM_UFFD_MISSING;
}
static inline bool userfaultfd_wp(struct vm_area_struct *vma)
{
return vma->vm_flags & VM_UFFD_WP;
}
static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
pte_t pte)
{
return userfaultfd_wp(vma) && pte_uffd_wp(pte);
}
static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
pmd_t pmd)
{
return userfaultfd_wp(vma) && pmd_uffd_wp(pmd);
}
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
{
return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP);
}
extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
extern void dup_userfaultfd_complete(struct list_head *);
extern void mremap_userfaultfd_prep(struct vm_area_struct *,
struct vm_userfaultfd_ctx *);
extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
unsigned long from, unsigned long to,
unsigned long len);
extern bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
unsigned long end);
extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct list_head *uf);
extern void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf);
#else /* CONFIG_USERFAULTFD */
/* mm helpers */
static inline vm_fault_t handle_userfault(struct vm_fault *vmf,
unsigned long reason)
{
return VM_FAULT_SIGBUS;
}
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx vm_ctx)
{
return true;
}
static inline bool userfaultfd_missing(struct vm_area_struct *vma)
{
return false;
}
static inline bool userfaultfd_wp(struct vm_area_struct *vma)
{
return false;
}
static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
pte_t pte)
{
return false;
}
static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma,
pmd_t pmd)
{
return false;
}
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
{
return false;
}
static inline int dup_userfaultfd(struct vm_area_struct *vma,
struct list_head *l)
{
return 0;
}
static inline void dup_userfaultfd_complete(struct list_head *l)
{
}
static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx *ctx)
{
}
static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
unsigned long from,
unsigned long to,
unsigned long len)
{
}
static inline bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
unsigned long end)
{
return true;
}
static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct list_head *uf)
{
return 0;
}
static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf)
{
}
#endif /* CONFIG_USERFAULTFD */
#endif /* _LINUX_USERFAULTFD_K_H */