Add support for new Linear Address Masking CPU feature. This is similar

to ARM's Top Byte Ignore and allows userspace to store metadata in some
 bits of pointers without masking it out before use.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEV76QKkVc4xCGURexaDWVMHDJkrAFAmRK/WIACgkQaDWVMHDJ
 krAL+RAAw33EhsWyYVkeAtYmYBKkGvlgeSDULtfJKe5bynJBTHkGKfM6RE9MSJIt
 5fHWaConGh8HNpy0Us1sDvd/aWcWRm5h7ZcCVD+R4qrgh/vc7ULzM+elXe5jzr4W
 cyuTckF2eW6SVrYg6fH5q+6Uy/moDtrdkLRvwRBf+AYeepB8gvSSH5XixKDNiVBE
 pjNy1xXVZQokqD4tjsFelmLttyacR5OabiE/aeVNoFYf9yTwfnN8N3T6kwuOoS4l
 Lp6NA+/0ux+oBlR+Is+JJG8Mxrjvz96yJGZYdR2YP5k3bMQtHAAjuq2w+GgqZm5i
 j3/E6KQepEGaCfC+bHl68xy/kKx8ik+jMCEcBalCC25J3uxbLz41g6K3aI890wJn
 +5ZtfcmoDUk9pnUyLxR8t+UjOSBFAcRSUE+FTjUH1qEGsMPK++9a4iLXz5vYVK1+
 +YCt1u5LNJbkDxE8xVX3F5jkXh0G01SJsuUVAOqHSNfqSNmohFK8/omqhVRrRqoK
 A7cYLtnOGiUXLnvjrwSxPNOzRrG+GAwqaw8gwOTaYogETWbTY8qsSCEVl204uYwd
 m8io9rk2ZXUdDuha56xpBbPE0JHL9hJ2eKCuPkfvRgJT9YFyTh+e0UdX20k+nDjc
 ang1S350o/Y0sus6rij1qS8AuxJIjHucG0GdgpZk3KUbcxoRLhI=
 =qitk
 -----END PGP SIGNATURE-----

Merge tag 'x86_mm_for_6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 LAM (Linear Address Masking) support from Dave Hansen:
 "Add support for the new Linear Address Masking CPU feature.

  This is similar to ARM's Top Byte Ignore and allows userspace to store
  metadata in some bits of pointers without masking it out before use"

* tag 'x86_mm_for_6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm/iommu/sva: Do not allow to set FORCE_TAGGED_SVA bit from outside
  x86/mm/iommu/sva: Fix error code for LAM enabling failure due to SVA
  selftests/x86/lam: Add test cases for LAM vs thread creation
  selftests/x86/lam: Add ARCH_FORCE_TAGGED_SVA test cases for linear-address masking
  selftests/x86/lam: Add inherit test cases for linear-address masking
  selftests/x86/lam: Add io_uring test cases for linear-address masking
  selftests/x86/lam: Add mmap and SYSCALL test cases for linear-address masking
  selftests/x86/lam: Add malloc and tag-bits test cases for linear-address masking
  x86/mm/iommu/sva: Make LAM and SVA mutually exclusive
  iommu/sva: Replace pasid_valid() helper with mm_valid_pasid()
  mm: Expose untagging mask in /proc/$PID/status
  x86/mm: Provide arch_prctl() interface for LAM
  x86/mm: Reduce untagged_addr() overhead for systems without LAM
  x86/uaccess: Provide untagged_addr() and remove tags before address check
  mm: Introduce untagged_addr_remote()
  x86/mm: Handle LAM on context switch
  x86: CPUID and CR3/CR4 flags for Linear Address Masking
  x86: Allow atomic MM_CONTEXT flags setting
  x86/mm: Rework address range check in get_user() and put_user()
This commit is contained in:
Linus Torvalds 2023-04-28 09:43:49 -07:00
commit 22b8cc3e78
35 changed files with 1701 additions and 149 deletions

View File

@ -288,6 +288,12 @@ void post_ttbr_update_workaround(void);
unsigned long arm64_mm_context_get(struct mm_struct *mm);
void arm64_mm_context_put(struct mm_struct *mm);
#define mm_untag_mask mm_untag_mask
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
{
return -1UL >> 8;
}
#include <asm-generic/mmu_context.h>
#endif /* !__ASSEMBLY__ */

View File

@ -185,6 +185,12 @@ static inline void finish_arch_post_lock_switch(void)
}
}
#define mm_untag_mask mm_untag_mask
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
{
return -1UL >> adi_nbits();
}
#include <asm-generic/mmu_context.h>
#endif /* !(__ASSEMBLY__) */

View File

@ -8,8 +8,10 @@
#include <linux/compiler.h>
#include <linux/string.h>
#include <linux/mm_types.h>
#include <asm/asi.h>
#include <asm/spitfire.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm-generic/access_ok.h>

View File

@ -2290,6 +2290,17 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING
If unsure, leave at the default value.
config ADDRESS_MASKING
bool "Linear Address Masking support"
depends on X86_64
help
Linear Address Masking (LAM) modifies the checking that is applied
to 64-bit linear addresses, allowing software to use of the
untranslated address bits for metadata.
The capability can be used for efficient address sanitizers (ASAN)
implementation and for optimizations in JITs.
config HOTPLUG_CPU
def_bool y
depends on SMP

View File

@ -317,7 +317,7 @@ static struct vm_area_struct gate_vma __ro_after_init = {
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
#ifdef CONFIG_COMPAT
if (!mm || !(mm->context.flags & MM_CONTEXT_HAS_VSYSCALL))
if (!mm || !test_bit(MM_CONTEXT_HAS_VSYSCALL, &mm->context.flags))
return NULL;
#endif
if (vsyscall_mode == NONE)

View File

@ -321,6 +321,7 @@
#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */

View File

@ -75,6 +75,12 @@
# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
#endif
#ifdef CONFIG_ADDRESS_MASKING
# define DISABLE_LAM 0
#else
# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
#endif
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@ -115,7 +121,7 @@
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING)
#define DISABLED_MASK12 0
#define DISABLED_MASK12 (DISABLE_LAM)
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0

View File

@ -9,9 +9,13 @@
#include <linux/bits.h>
/* Uprobes on this MM assume 32-bit code */
#define MM_CONTEXT_UPROBE_IA32 BIT(0)
#define MM_CONTEXT_UPROBE_IA32 0
/* vsyscall page is accessible on this MM */
#define MM_CONTEXT_HAS_VSYSCALL BIT(1)
#define MM_CONTEXT_HAS_VSYSCALL 1
/* Do not allow changing LAM mode */
#define MM_CONTEXT_LOCK_LAM 2
/* Allow LAM and SVA coexisting */
#define MM_CONTEXT_FORCE_TAGGED_SVA 3
/*
* x86 has arch-specific MMU state beyond what lives in mm_struct.
@ -39,7 +43,15 @@ typedef struct {
#endif
#ifdef CONFIG_X86_64
unsigned short flags;
unsigned long flags;
#endif
#ifdef CONFIG_ADDRESS_MASKING
/* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
unsigned long lam_cr3_mask;
/* Significant bits of the virtual address. Excludes tag bits. */
u64 untag_mask;
#endif
struct mutex lock;

View File

@ -85,6 +85,51 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
}
#endif
#ifdef CONFIG_ADDRESS_MASKING
static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
{
return mm->context.lam_cr3_mask;
}
static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
{
mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
mm->context.untag_mask = oldmm->context.untag_mask;
}
#define mm_untag_mask mm_untag_mask
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
{
return mm->context.untag_mask;
}
static inline void mm_reset_untag_mask(struct mm_struct *mm)
{
mm->context.untag_mask = -1UL;
}
#define arch_pgtable_dma_compat arch_pgtable_dma_compat
static inline bool arch_pgtable_dma_compat(struct mm_struct *mm)
{
return !mm_lam_cr3_mask(mm) ||
test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags);
}
#else
static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
{
return 0;
}
static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
{
}
static inline void mm_reset_untag_mask(struct mm_struct *mm)
{
}
#endif
#define enter_lazy_tlb enter_lazy_tlb
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
@ -109,6 +154,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.execute_only_pkey = -1;
}
#endif
mm_reset_untag_mask(mm);
init_new_context_ldt(mm);
return 0;
}
@ -162,6 +208,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
arch_dup_pkeys(oldmm, mm);
paravirt_enter_mmap(mm);
dup_lam(oldmm, mm);
return ldt_dup_context(oldmm, mm);
}
@ -175,7 +222,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
static inline bool is_64bit_mm(struct mm_struct *mm)
{
return !IS_ENABLED(CONFIG_IA32_EMULATION) ||
!(mm->context.flags & MM_CONTEXT_UPROBE_IA32);
!test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags);
}
#else
static inline bool is_64bit_mm(struct mm_struct *mm)

View File

@ -28,6 +28,8 @@
* On systems with SME, one bit (in a variable position!) is stolen to indicate
* that the top-level paging structure is encrypted.
*
* On systemms with LAM, bits 61 and 62 are used to indicate LAM mode.
*
* All of the remaining bits indicate the physical address of the top-level
* paging structure.
*

View File

@ -2,7 +2,7 @@
#ifndef _ASM_X86_TLBFLUSH_H
#define _ASM_X86_TLBFLUSH_H
#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <asm/processor.h>
@ -12,6 +12,7 @@
#include <asm/invpcid.h>
#include <asm/pti.h>
#include <asm/processor-flags.h>
#include <asm/pgtable.h>
void __flush_tlb_all(void);
@ -53,6 +54,15 @@ static inline void cr4_clear_bits(unsigned long mask)
local_irq_restore(flags);
}
#ifdef CONFIG_ADDRESS_MASKING
DECLARE_PER_CPU(u64, tlbstate_untag_mask);
static inline u64 current_untag_mask(void)
{
return this_cpu_read(tlbstate_untag_mask);
}
#endif
#ifndef MODULE
/*
* 6 because 6 should be plenty and struct tlb_state will fit in two cache
@ -101,6 +111,16 @@ struct tlb_state {
*/
bool invalidate_other;
#ifdef CONFIG_ADDRESS_MASKING
/*
* Active LAM mode.
*
* X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
* disabled.
*/
u8 lam;
#endif
/*
* Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
* the corresponding user PCID needs a flush next time we
@ -357,6 +377,32 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
}
#define huge_pmd_needs_flush huge_pmd_needs_flush
#ifdef CONFIG_ADDRESS_MASKING
static inline u64 tlbstate_lam_cr3_mask(void)
{
u64 lam = this_cpu_read(cpu_tlbstate.lam);
return lam << X86_CR3_LAM_U57_BIT;
}
static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
{
this_cpu_write(cpu_tlbstate.lam,
mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT);
this_cpu_write(tlbstate_untag_mask, mm->context.untag_mask);
}
#else
static inline u64 tlbstate_lam_cr3_mask(void)
{
return 0;
}
static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
{
}
#endif
#endif /* !MODULE */
static inline void __native_tlb_flush_global(unsigned long cr4)

View File

@ -7,11 +7,14 @@
#include <linux/compiler.h>
#include <linux/instrumented.h>
#include <linux/kasan-checks.h>
#include <linux/mm_types.h>
#include <linux/string.h>
#include <linux/mmap_lock.h>
#include <asm/asm.h>
#include <asm/page.h>
#include <asm/smap.h>
#include <asm/extable.h>
#include <asm/tlbflush.h>
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
static inline bool pagefault_disabled(void);
@ -21,6 +24,57 @@ static inline bool pagefault_disabled(void);
# define WARN_ON_IN_IRQ()
#endif
#ifdef CONFIG_ADDRESS_MASKING
/*
* Mask out tag bits from the address.
*
* Magic with the 'sign' allows to untag userspace pointer without any branches
* while leaving kernel addresses intact.
*/
static inline unsigned long __untagged_addr(unsigned long addr)
{
long sign;
/*
* Refer tlbstate_untag_mask directly to avoid RIP-relative relocation
* in alternative instructions. The relocation gets wrong when gets
* copied to the target place.
*/
asm (ALTERNATIVE("",
"sar $63, %[sign]\n\t" /* user_ptr ? 0 : -1UL */
"or %%gs:tlbstate_untag_mask, %[sign]\n\t"
"and %[sign], %[addr]\n\t", X86_FEATURE_LAM)
: [addr] "+r" (addr), [sign] "=r" (sign)
: "m" (tlbstate_untag_mask), "[sign]" (addr));
return addr;
}
#define untagged_addr(addr) ({ \
unsigned long __addr = (__force unsigned long)(addr); \
(__force __typeof__(addr))__untagged_addr(__addr); \
})
static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
unsigned long addr)
{
long sign = addr >> 63;
mmap_assert_locked(mm);
addr &= (mm)->context.untag_mask | sign;
return addr;
}
#define untagged_addr_remote(mm, addr) ({ \
unsigned long __addr = (__force unsigned long)(addr); \
(__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \
})
#else
#define untagged_addr(addr) (addr)
#endif
/**
* access_ok - Checks if a user space pointer is valid
* @addr: User space pointer to start of block to check
@ -38,10 +92,10 @@ static inline bool pagefault_disabled(void);
* Return: true (nonzero) if the memory block may be valid, false (zero)
* if it is definitely invalid.
*/
#define access_ok(addr, size) \
#define access_ok(addr, size) \
({ \
WARN_ON_IN_IRQ(); \
likely(__access_ok(addr, size)); \
likely(__access_ok(untagged_addr(addr), size)); \
})
#include <asm-generic/access_ok.h>

View File

@ -23,4 +23,9 @@
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003
#define ARCH_GET_UNTAG_MASK 0x4001
#define ARCH_ENABLE_TAGGED_ADDR 0x4002
#define ARCH_GET_MAX_TAG_BITS 0x4003
#define ARCH_FORCE_TAGGED_SVA 0x4004
#endif /* _ASM_X86_PRCTL_H */

View File

@ -82,6 +82,10 @@
#define X86_CR3_PCID_BITS 12
#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
#define X86_CR3_LAM_U57_BIT 61 /* Activate LAM for userspace, 62:57 bits masked */
#define X86_CR3_LAM_U57 _BITULL(X86_CR3_LAM_U57_BIT)
#define X86_CR3_LAM_U48_BIT 62 /* Activate LAM for userspace, 62:48 bits masked */
#define X86_CR3_LAM_U48 _BITULL(X86_CR3_LAM_U48_BIT)
#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
@ -132,6 +136,8 @@
#define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT)
#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */
#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT)
#define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */
#define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT)
/*
* x86-64 Task Priority Register, CR8

View File

@ -48,6 +48,7 @@
#include <asm/frame.h>
#include <asm/unwind.h>
#include <asm/tdx.h>
#include <asm/mmu_context.h>
#include "process.h"
@ -162,6 +163,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
if (p->mm && (clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM)
set_bit(MM_CONTEXT_LOCK_LAM, &p->mm->context.flags);
#else
p->thread.sp0 = (unsigned long) (childregs + 1);
savesegment(gs, p->thread.gs);
@ -368,6 +372,8 @@ void arch_setup_new_exec(void)
task_clear_spec_ssb_noexec(current);
speculation_ctrl_update(read_thread_flags());
}
mm_reset_untag_mask(current->mm);
}
#ifdef CONFIG_X86_IOPL_IOPERM

View File

@ -671,7 +671,7 @@ void set_personality_64bit(void)
task_pt_regs(current)->orig_ax = __NR_execve;
current_thread_info()->status &= ~TS_COMPAT;
if (current->mm)
current->mm->context.flags = MM_CONTEXT_HAS_VSYSCALL;
__set_bit(MM_CONTEXT_HAS_VSYSCALL, &current->mm->context.flags);
/* TBD: overwrites user setup. Should have two bits.
But 64bit processes have always behaved this way,
@ -708,7 +708,7 @@ static void __set_personality_ia32(void)
* uprobes applied to this MM need to know this and
* cannot use user_64bit_mode() at that time.
*/
current->mm->context.flags = MM_CONTEXT_UPROBE_IA32;
__set_bit(MM_CONTEXT_UPROBE_IA32, &current->mm->context.flags);
}
current->personality |= force_personality32;
@ -743,6 +743,52 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
}
#endif
#ifdef CONFIG_ADDRESS_MASKING
#define LAM_U57_BITS 6
static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
{
if (!cpu_feature_enabled(X86_FEATURE_LAM))
return -ENODEV;
/* PTRACE_ARCH_PRCTL */
if (current->mm != mm)
return -EINVAL;
if (mm_valid_pasid(mm) &&
!test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags))
return -EINVAL;
if (mmap_write_lock_killable(mm))
return -EINTR;
if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) {
mmap_write_unlock(mm);
return -EBUSY;
}
if (!nr_bits) {
mmap_write_unlock(mm);
return -EINVAL;
} else if (nr_bits <= LAM_U57_BITS) {
mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
mm->context.untag_mask = ~GENMASK(62, 57);
} else {
mmap_write_unlock(mm);
return -EINVAL;
}
write_cr3(__read_cr3() | mm->context.lam_cr3_mask);
set_tlbstate_lam_mode(mm);
set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags);
mmap_write_unlock(mm);
return 0;
}
#endif
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
{
int ret = 0;
@ -830,7 +876,23 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
case ARCH_MAP_VDSO_64:
return prctl_map_vdso(&vdso_image_64, arg2);
#endif
#ifdef CONFIG_ADDRESS_MASKING
case ARCH_GET_UNTAG_MASK:
return put_user(task->mm->context.untag_mask,
(unsigned long __user *)arg2);
case ARCH_ENABLE_TAGGED_ADDR:
return prctl_enable_tagged_addr(task->mm, arg2);
case ARCH_FORCE_TAGGED_SVA:
if (current != task)
return -EINVAL;
set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags);
return 0;
case ARCH_GET_MAX_TAG_BITS:
if (!cpu_feature_enabled(X86_FEATURE_LAM))
return put_user(0, (unsigned long __user *)arg2);
else
return put_user(LAM_U57_BITS, (unsigned long __user *)arg2);
#endif
default:
ret = -EINVAL;
break;

View File

@ -671,15 +671,15 @@ static bool try_fixup_enqcmd_gp(void)
if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
return false;
pasid = current->mm->pasid;
/*
* If the mm has not been allocated a
* PASID, the #GP can not be fixed up.
*/
if (!pasid_valid(pasid))
if (!mm_valid_pasid(current->mm))
return false;
pasid = current->mm->pasid;
/*
* Did this thread already have its PASID activated?
* If so, the #GP must be from something else.

View File

@ -37,22 +37,22 @@
#define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC
#ifdef CONFIG_X86_5LEVEL
#define LOAD_TASK_SIZE_MINUS_N(n) \
ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \
__stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57
#else
#define LOAD_TASK_SIZE_MINUS_N(n) \
mov $(TASK_SIZE_MAX - (n)),%_ASM_DX
#endif
.macro check_range size:req
.if IS_ENABLED(CONFIG_X86_64)
mov %rax, %rdx
sar $63, %rdx
or %rdx, %rax
.else
cmp $TASK_SIZE_MAX-\size+1, %eax
jae .Lbad_get_user
sbb %edx, %edx /* array_index_mask_nospec() */
and %edx, %eax
.endif
.endm
.text
SYM_FUNC_START(__get_user_1)
LOAD_TASK_SIZE_MINUS_N(0)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
check_range size=1
ASM_STAC
1: movzbl (%_ASM_AX),%edx
xor %eax,%eax
@ -62,11 +62,7 @@ SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
SYM_FUNC_START(__get_user_2)
LOAD_TASK_SIZE_MINUS_N(1)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
check_range size=2
ASM_STAC
2: movzwl (%_ASM_AX),%edx
xor %eax,%eax
@ -76,11 +72,7 @@ SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
SYM_FUNC_START(__get_user_4)
LOAD_TASK_SIZE_MINUS_N(3)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
check_range size=4
ASM_STAC
3: movl (%_ASM_AX),%edx
xor %eax,%eax
@ -90,30 +82,17 @@ SYM_FUNC_END(__get_user_4)
EXPORT_SYMBOL(__get_user_4)
SYM_FUNC_START(__get_user_8)
check_range size=8
ASM_STAC
#ifdef CONFIG_X86_64
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movq (%_ASM_AX),%rdx
xor %eax,%eax
ASM_CLAC
RET
#else
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user_8
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movl (%_ASM_AX),%edx
5: movl 4(%_ASM_AX),%ecx
#endif
xor %eax,%eax
ASM_CLAC
RET
#endif
SYM_FUNC_END(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
@ -166,7 +145,7 @@ EXPORT_SYMBOL(__get_user_nocheck_8)
SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
ASM_CLAC
bad_get_user:
.Lbad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
RET
@ -184,23 +163,23 @@ SYM_CODE_END(.Lbad_get_user_8_clac)
#endif
/* get_user */
_ASM_EXTABLE_UA(1b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(2b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(3b, .Lbad_get_user_clac)
_ASM_EXTABLE(1b, .Lbad_get_user_clac)
_ASM_EXTABLE(2b, .Lbad_get_user_clac)
_ASM_EXTABLE(3b, .Lbad_get_user_clac)
#ifdef CONFIG_X86_64
_ASM_EXTABLE_UA(4b, .Lbad_get_user_clac)
_ASM_EXTABLE(4b, .Lbad_get_user_clac)
#else
_ASM_EXTABLE_UA(4b, .Lbad_get_user_8_clac)
_ASM_EXTABLE_UA(5b, .Lbad_get_user_8_clac)
_ASM_EXTABLE(4b, .Lbad_get_user_8_clac)
_ASM_EXTABLE(5b, .Lbad_get_user_8_clac)
#endif
/* __get_user */
_ASM_EXTABLE_UA(6b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(7b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(8b, .Lbad_get_user_clac)
_ASM_EXTABLE(6b, .Lbad_get_user_clac)
_ASM_EXTABLE(7b, .Lbad_get_user_clac)
_ASM_EXTABLE(8b, .Lbad_get_user_clac)
#ifdef CONFIG_X86_64
_ASM_EXTABLE_UA(9b, .Lbad_get_user_clac)
_ASM_EXTABLE(9b, .Lbad_get_user_clac)
#else
_ASM_EXTABLE_UA(9b, .Lbad_get_user_8_clac)
_ASM_EXTABLE_UA(10b, .Lbad_get_user_8_clac)
_ASM_EXTABLE(9b, .Lbad_get_user_8_clac)
_ASM_EXTABLE(10b, .Lbad_get_user_8_clac)
#endif

View File

@ -33,20 +33,20 @@
* as they get called from within inline assembly.
*/
#ifdef CONFIG_X86_5LEVEL
#define LOAD_TASK_SIZE_MINUS_N(n) \
ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rbx), \
__stringify(mov $((1 << 56) - 4096 - (n)),%rbx), X86_FEATURE_LA57
#else
#define LOAD_TASK_SIZE_MINUS_N(n) \
mov $(TASK_SIZE_MAX - (n)),%_ASM_BX
#endif
.macro check_range size:req
.if IS_ENABLED(CONFIG_X86_64)
mov %rcx, %rbx
sar $63, %rbx
or %rbx, %rcx
.else
cmp $TASK_SIZE_MAX-\size+1, %ecx
jae .Lbad_put_user
.endif
.endm
.text
SYM_FUNC_START(__put_user_1)
LOAD_TASK_SIZE_MINUS_N(0)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
check_range size=1
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %ecx,%ecx
@ -66,9 +66,7 @@ SYM_FUNC_END(__put_user_nocheck_1)
EXPORT_SYMBOL(__put_user_nocheck_1)
SYM_FUNC_START(__put_user_2)
LOAD_TASK_SIZE_MINUS_N(1)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
check_range size=2
ASM_STAC
3: movw %ax,(%_ASM_CX)
xor %ecx,%ecx
@ -88,9 +86,7 @@ SYM_FUNC_END(__put_user_nocheck_2)
EXPORT_SYMBOL(__put_user_nocheck_2)
SYM_FUNC_START(__put_user_4)
LOAD_TASK_SIZE_MINUS_N(3)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
check_range size=4
ASM_STAC
5: movl %eax,(%_ASM_CX)
xor %ecx,%ecx
@ -110,9 +106,7 @@ SYM_FUNC_END(__put_user_nocheck_4)
EXPORT_SYMBOL(__put_user_nocheck_4)
SYM_FUNC_START(__put_user_8)
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
check_range size=8
ASM_STAC
7: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
@ -144,15 +138,15 @@ SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
RET
SYM_CODE_END(.Lbad_put_user_clac)
_ASM_EXTABLE_UA(1b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(3b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(4b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(5b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(6b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(7b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(9b, .Lbad_put_user_clac)
_ASM_EXTABLE(1b, .Lbad_put_user_clac)
_ASM_EXTABLE(2b, .Lbad_put_user_clac)
_ASM_EXTABLE(3b, .Lbad_put_user_clac)
_ASM_EXTABLE(4b, .Lbad_put_user_clac)
_ASM_EXTABLE(5b, .Lbad_put_user_clac)
_ASM_EXTABLE(6b, .Lbad_put_user_clac)
_ASM_EXTABLE(7b, .Lbad_put_user_clac)
_ASM_EXTABLE(9b, .Lbad_put_user_clac)
#ifdef CONFIG_X86_32
_ASM_EXTABLE_UA(8b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(10b, .Lbad_put_user_clac)
_ASM_EXTABLE(8b, .Lbad_put_user_clac)
_ASM_EXTABLE(10b, .Lbad_put_user_clac)
#endif

View File

@ -1048,6 +1048,11 @@ __visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
#ifdef CONFIG_ADDRESS_MASKING
DEFINE_PER_CPU(u64, tlbstate_untag_mask);
EXPORT_PER_CPU_SYMBOL(tlbstate_untag_mask);
#endif
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{
/* entry 0 MUST be WB (hardwired to speed up translations) */

View File

@ -154,26 +154,30 @@ static inline u16 user_pcid(u16 asid)
return ret;
}
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
{
unsigned long cr3 = __sme_pa(pgd) | lam;
if (static_cpu_has(X86_FEATURE_PCID)) {
return __sme_pa(pgd) | kern_pcid(asid);
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
cr3 |= kern_pcid(asid);
} else {
VM_WARN_ON_ONCE(asid != 0);
return __sme_pa(pgd);
}
return cr3;
}
static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
unsigned long lam)
{
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
/*
* Use boot_cpu_has() instead of this_cpu_has() as this function
* might be called during early boot. This should work even after
* boot because all CPU's the have same capabilities:
*/
VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
return build_cr3(pgd, asid, lam) | CR3_NOFLUSH;
}
/*
@ -274,15 +278,16 @@ static inline void invalidate_user_asid(u16 asid)
(unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
}
static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam,
bool need_flush)
{
unsigned long new_mm_cr3;
if (need_flush) {
invalidate_user_asid(new_asid);
new_mm_cr3 = build_cr3(pgdir, new_asid);
new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
} else {
new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
}
/*
@ -491,6 +496,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
{
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
unsigned long new_lam = mm_lam_cr3_mask(next);
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
unsigned cpu = smp_processor_id();
u64 next_tlb_gen;
@ -520,7 +526,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* isn't free.
*/
#ifdef CONFIG_DEBUG_VM
if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid,
tlbstate_lam_cr3_mask()))) {
/*
* If we were to BUG here, we'd be very likely to kill
* the system so hard that we don't see the call trace.
@ -552,9 +559,15 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* instruction.
*/
if (real_prev == next) {
/* Not actually switching mm's */
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
next->context.ctx_id);
/*
* If this races with another thread that enables lam, 'new_lam'
* might not match tlbstate_lam_cr3_mask().
*/
/*
* Even in lazy TLB mode, the CPU should stay set in the
* mm_cpumask. The TLB shootdown code can figure out from
@ -622,15 +635,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
barrier();
}
set_tlbstate_lam_mode(next);
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
load_new_mm_cr3(next->pgd, new_asid, true);
load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
load_new_mm_cr3(next->pgd, new_asid, false);
load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
@ -691,6 +705,10 @@ void initialize_tlbstate_and_flush(void)
/* Assert that CR3 already references the right mm. */
WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
/* LAM expected to be disabled */
WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
WARN_ON(mm_lam_cr3_mask(mm));
/*
* Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
* doesn't work like other CR4 bits because it can only be set from
@ -699,8 +717,8 @@ void initialize_tlbstate_and_flush(void)
WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
!(cr4_read_shadow() & X86_CR4_PCIDE));
/* Force ASID 0 and force a TLB flush. */
write_cr3(build_cr3(mm->pgd, 0));
/* Disable LAM, force ASID 0 and force a TLB flush. */
write_cr3(build_cr3(mm->pgd, 0, 0));
/* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
@ -708,6 +726,7 @@ void initialize_tlbstate_and_flush(void)
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
set_tlbstate_lam_mode(mm);
for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
@ -1071,8 +1090,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
*/
unsigned long __get_current_cr3_fast(void)
{
unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
this_cpu_read(cpu_tlbstate.loaded_mm_asid));
unsigned long cr3 =
build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
this_cpu_read(cpu_tlbstate.loaded_mm_asid),
tlbstate_lam_cr3_mask());
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || preemptible());

View File

@ -2,6 +2,7 @@
/*
* Helpers for IOMMU drivers implementing SVA
*/
#include <linux/mmu_context.h>
#include <linux/mutex.h>
#include <linux/sched/mm.h>
#include <linux/iommu.h>
@ -32,16 +33,19 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
min == 0 || max < min)
return -EINVAL;
if (!arch_pgtable_dma_compat(mm))
return -EBUSY;
mutex_lock(&iommu_sva_lock);
/* Is a PASID already associated with this mm? */
if (pasid_valid(mm->pasid)) {
if (mm_valid_pasid(mm)) {
if (mm->pasid < min || mm->pasid >= max)
ret = -EOVERFLOW;
goto out;
}
pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
if (!pasid_valid(pasid))
if (pasid == INVALID_IOASID)
ret = -ENOMEM;
else
mm_pasid_set(mm, pasid);

View File

@ -580,7 +580,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
goto done;
}
vaddr = untagged_addr(vaddr);
vaddr = untagged_addr_remote(mm, vaddr);
retry:
vma = vma_lookup(mm, vaddr);

View File

@ -91,6 +91,7 @@
#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
#include <linux/kthread.h>
#include <linux/mmu_context.h>
#include <asm/processor.h>
#include "internal.h"
@ -425,6 +426,11 @@ static inline void task_thp_status(struct seq_file *m, struct mm_struct *mm)
seq_printf(m, "THP_enabled:\t%d\n", thp_enabled);
}
static inline void task_untag_mask(struct seq_file *m, struct mm_struct *mm)
{
seq_printf(m, "untag_mask:\t%#lx\n", mm_untag_mask(mm));
}
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
@ -440,6 +446,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
task_mem(m, mm);
task_core_dumping(m, task);
task_thp_status(m, mm);
task_untag_mask(m, mm);
mmput(mm);
}
task_sig(m, task);

View File

@ -1688,8 +1688,13 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* watch out for wraparound */
start_vaddr = end_vaddr;
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) {
ret = mmap_read_lock_killable(mm);
if (ret)
goto out_free;
start_vaddr = untagged_addr_remote(mm, svpfn << PAGE_SHIFT);
mmap_read_unlock(mm);
}
/* Ensure the address is inside the task */
if (start_vaddr > mm->task_size)

View File

@ -40,10 +40,6 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
int ioasid_register_allocator(struct ioasid_allocator_ops *allocator);
void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator);
int ioasid_set_data(ioasid_t ioasid, void *data);
static inline bool pasid_valid(ioasid_t ioasid)
{
return ioasid != INVALID_IOASID;
}
#else /* !CONFIG_IOASID */
static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
@ -74,10 +70,5 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data)
return -ENOTSUPP;
}
static inline bool pasid_valid(ioasid_t ioasid)
{
return false;
}
#endif /* CONFIG_IOASID */
#endif /* __LINUX_IOASID_H */

View File

@ -98,17 +98,6 @@ extern int mmap_rnd_compat_bits __read_mostly;
#include <asm/page.h>
#include <asm/processor.h>
/*
* Architectures that support memory tagging (assigning tags to memory regions,
* embedding these tags into addresses that point to these memory regions, and
* checking that the memory and the pointer tags match on memory accesses)
* redefine this macro to strip tags from pointers.
* It's defined as noop for architectures that don't support memory tagging.
*/
#ifndef untagged_addr
#define untagged_addr(addr) (addr)
#endif
#ifndef __pa_symbol
#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
#endif

View File

@ -28,4 +28,18 @@ static inline void leave_mm(int cpu) { }
# define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p))
#endif
#ifndef mm_untag_mask
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
{
return -1UL;
}
#endif
#ifndef arch_pgtable_dma_compat
static inline bool arch_pgtable_dma_compat(struct mm_struct *mm)
{
return true;
}
#endif
#endif

View File

@ -485,6 +485,11 @@ static inline void mm_pasid_init(struct mm_struct *mm)
mm->pasid = INVALID_IOASID;
}
static inline bool mm_valid_pasid(struct mm_struct *mm)
{
return mm->pasid != INVALID_IOASID;
}
/* Associate a PASID with an mm_struct: */
static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
{
@ -493,13 +498,14 @@ static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
static inline void mm_pasid_drop(struct mm_struct *mm)
{
if (pasid_valid(mm->pasid)) {
if (mm_valid_pasid(mm)) {
ioasid_free(mm->pasid);
mm->pasid = INVALID_IOASID;
}
}
#else
static inline void mm_pasid_init(struct mm_struct *mm) {}
static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; }
static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
static inline void mm_pasid_drop(struct mm_struct *mm) {}
#endif

View File

@ -10,6 +10,28 @@
#include <asm/uaccess.h>
/*
* Architectures that support memory tagging (assigning tags to memory regions,
* embedding these tags into addresses that point to these memory regions, and
* checking that the memory and the pointer tags match on memory accesses)
* redefine this macro to strip tags from pointers.
*
* Passing down mm_struct allows to define untagging rules on per-process
* basis.
*
* It's defined as noop for architectures that don't support memory tagging.
*/
#ifndef untagged_addr
#define untagged_addr(addr) (addr)
#endif
#ifndef untagged_addr_remote
#define untagged_addr_remote(mm, addr) ({ \
mmap_assert_locked(mm); \
untagged_addr(addr); \
})
#endif
/*
* Architectures should provide two primitives (raw_copy_{to,from}_user())
* and get rid of their private instances of copy_{to,from}_user() and

View File

@ -1085,7 +1085,7 @@ static long __get_user_pages(struct mm_struct *mm,
if (!nr_pages)
return 0;
start = untagged_addr(start);
start = untagged_addr_remote(mm, start);
VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));
@ -1259,7 +1259,7 @@ int fixup_user_fault(struct mm_struct *mm,
struct vm_area_struct *vma;
vm_fault_t ret;
address = untagged_addr(address);
address = untagged_addr_remote(mm, address);
if (unlocked)
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

View File

@ -1390,8 +1390,6 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
size_t len;
struct blk_plug plug;
start = untagged_addr(start);
if (!madvise_behavior_valid(behavior))
return -EINVAL;
@ -1423,6 +1421,9 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
mmap_read_lock(mm);
}
start = untagged_addr_remote(mm, start);
end = start + len;
blk_start_plug(&plug);
error = madvise_walk_vmas(mm, start, end, behavior,
madvise_vma_behavior);

View File

@ -2099,15 +2099,18 @@ static int do_move_pages_to_node(struct mm_struct *mm,
* target node
* 1 - when it has been queued
*/
static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
static int add_page_for_migration(struct mm_struct *mm, const void __user *p,
int node, struct list_head *pagelist, bool migrate_all)
{
struct vm_area_struct *vma;
unsigned long addr;
struct page *page;
int err;
bool isolated;
mmap_read_lock(mm);
addr = (unsigned long)untagged_addr_remote(mm, p);
err = -EFAULT;
vma = vma_lookup(mm, addr);
if (!vma || !vma_migratable(vma))
@ -2213,7 +2216,6 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
for (i = start = 0; i < nr_pages; i++) {
const void __user *p;
unsigned long addr;
int node;
err = -EFAULT;
@ -2221,7 +2223,6 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
goto out_flush;
if (get_user(node, nodes + i))
goto out_flush;
addr = (unsigned long)untagged_addr(p);
err = -ENODEV;
if (node < 0 || node >= MAX_NUMNODES)
@ -2249,8 +2250,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
* Errors in the page lookup or isolation are not fatal and we simply
* report them via status
*/
err = add_page_for_migration(mm, addr, current_node,
&pagelist, flags & MPOL_MF_MOVE_ALL);
err = add_page_for_migration(mm, p, current_node, &pagelist,
flags & MPOL_MF_MOVE_ALL);
if (err > 0) {
/* The page is successfully queued for migration */

View File

@ -18,7 +18,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
corrupt_xstate_header amx
corrupt_xstate_header amx lam
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall

File diff suppressed because it is too large Load Diff