mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-11-01 17:08:10 +00:00
3e3786801b
set_fs() sets the addr_limit, which is used in access_ok() to
determine if an address is a user or kernel address.
Some code paths use set_fs() to temporarily elevate the addr_limit so
that kernel code can read/write kernel memory as if it were user
memory. That is fine as long as the code can't ever return to
userspace with the addr_limit still elevated.
If that did happen, then userspace can read/write kernel memory as if
it were user memory, eg. just with write(2). In case it's not clear,
that is very bad. It has also happened in the past due to bugs.
Commit 5ea0727b16
("x86/syscalls: Check address limit on user-mode
return") added a mechanism to check the addr_limit value before
returning to userspace. Any call to set_fs() sets a thread flag,
TIF_FSCHECK, and if we see that on the return to userspace we go out
of line to check that the addr_limit value is not elevated.
For further info see the above commit, as well as:
https://lwn.net/Articles/722267/
https://bugs.chromium.org/p/project-zero/issues/detail?id=990
Verified to work on 64-bit Book3S using a POC that objdumps the system
call handler, and a modified lkdtm_CORRUPT_USER_DS() that doesn't kill
the caller.
Before:
$ sudo ./test-tif-fscheck
...
0000000000000000 <.data>:
0: e1 f7 8a 79 rldicl. r10,r12,30,63
4: 80 03 82 40 bne 0x384
8: 00 40 8a 71 andi. r10,r12,16384
c: 78 0b 2a 7c mr r10,r1
10: 10 fd 21 38 addi r1,r1,-752
14: 08 00 c2 41 beq- 0x1c
18: 58 09 2d e8 ld r1,2392(r13)
1c: 00 00 41 f9 std r10,0(r1)
20: 70 01 61 f9 std r11,368(r1)
24: 78 01 81 f9 std r12,376(r1)
28: 70 00 01 f8 std r0,112(r1)
2c: 78 00 41 f9 std r10,120(r1)
30: 20 00 82 41 beq 0x50
34: a6 42 4c 7d mftb r10
After:
$ sudo ./test-tif-fscheck
Killed
And in dmesg:
Invalid address limit on user-mode return
WARNING: CPU: 1 PID: 3689 at ../include/linux/syscalls.h:260 do_notify_resume+0x140/0x170
...
NIP [c00000000001ee50] do_notify_resume+0x140/0x170
LR [c00000000001ee4c] do_notify_resume+0x13c/0x170
Call Trace:
do_notify_resume+0x13c/0x170 (unreliable)
ret_from_except_lite+0x70/0x74
Performance overhead is essentially zero in the usual case, because
the bit is checked as part of the existing _TIF_USER_WORK_MASK check.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
375 lines
10 KiB
C
375 lines
10 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ARCH_POWERPC_UACCESS_H
|
|
#define _ARCH_POWERPC_UACCESS_H
|
|
|
|
#include <asm/asm-compat.h>
|
|
#include <asm/ppc_asm.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/page.h>
|
|
#include <asm/extable.h>
|
|
|
|
/*
|
|
* The fs value determines whether argument validity checking should be
|
|
* performed or not. If get_fs() == USER_DS, checking is performed, with
|
|
* get_fs() == KERNEL_DS, checking is bypassed.
|
|
*
|
|
* For historical reasons, these macros are grossly misnamed.
|
|
*
|
|
* The fs/ds values are now the highest legal address in the "segment".
|
|
* This simplifies the checking in the routines below.
|
|
*/
|
|
|
|
#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
|
|
|
|
#define KERNEL_DS MAKE_MM_SEG(~0UL)
|
|
#ifdef __powerpc64__
|
|
/* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */
|
|
#define USER_DS MAKE_MM_SEG(TASK_SIZE_USER64 - 1)
|
|
#else
|
|
#define USER_DS MAKE_MM_SEG(TASK_SIZE - 1)
|
|
#endif
|
|
|
|
#define get_ds() (KERNEL_DS)
|
|
#define get_fs() (current->thread.addr_limit)
|
|
|
|
static inline void set_fs(mm_segment_t fs)
|
|
{
|
|
current->thread.addr_limit = fs;
|
|
/* On user-mode return check addr_limit (fs) is correct */
|
|
set_thread_flag(TIF_FSCHECK);
|
|
}
|
|
|
|
#define segment_eq(a, b) ((a).seg == (b).seg)
|
|
|
|
#define user_addr_max() (get_fs().seg)
|
|
|
|
#ifdef __powerpc64__
|
|
/*
|
|
* This check is sufficient because there is a large enough
|
|
* gap between user addresses and the kernel addresses
|
|
*/
|
|
#define __access_ok(addr, size, segment) \
|
|
(((addr) <= (segment).seg) && ((size) <= (segment).seg))
|
|
|
|
#else
|
|
|
|
static inline int __access_ok(unsigned long addr, unsigned long size,
|
|
mm_segment_t seg)
|
|
{
|
|
if (addr > seg.seg)
|
|
return 0;
|
|
return (size == 0 || size - 1 <= seg.seg - addr);
|
|
}
|
|
|
|
#endif
|
|
|
|
#define access_ok(type, addr, size) \
|
|
(__chk_user_ptr(addr), \
|
|
__access_ok((__force unsigned long)(addr), (size), get_fs()))
|
|
|
|
/*
|
|
* These are the main single-value transfer routines. They automatically
|
|
* use the right size if we just have the right pointer type.
|
|
*
|
|
* This gets kind of ugly. We want to return _two_ values in "get_user()"
|
|
* and yet we don't want to do any pointers, because that is too much
|
|
* of a performance impact. Thus we have a few rather ugly macros here,
|
|
* and hide all the ugliness from the user.
|
|
*
|
|
* The "__xxx" versions of the user access functions are versions that
|
|
* do not verify the address space, that must have been done previously
|
|
* with a separate "access_ok()" call (this is used when we do multiple
|
|
* accesses to the same area of user memory).
|
|
*
|
|
* As we use the same address space for kernel and user data on the
|
|
* PowerPC, we can just do these as direct assignments. (Of course, the
|
|
* exception handling means that it's no longer "just"...)
|
|
*
|
|
*/
|
|
#define get_user(x, ptr) \
|
|
__get_user_check((x), (ptr), sizeof(*(ptr)))
|
|
#define put_user(x, ptr) \
|
|
__put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
|
|
|
|
#define __get_user(x, ptr) \
|
|
__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
|
|
#define __put_user(x, ptr) \
|
|
__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
|
|
|
|
#define __get_user_inatomic(x, ptr) \
|
|
__get_user_nosleep((x), (ptr), sizeof(*(ptr)))
|
|
#define __put_user_inatomic(x, ptr) \
|
|
__put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
|
|
|
|
extern long __put_user_bad(void);
|
|
|
|
/*
|
|
* We don't tell gcc that we are accessing memory, but this is OK
|
|
* because we do not write to any memory gcc knows about, so there
|
|
* are no aliasing issues.
|
|
*/
|
|
#define __put_user_asm(x, addr, err, op) \
|
|
__asm__ __volatile__( \
|
|
"1: " op " %1,0(%2) # put_user\n" \
|
|
"2:\n" \
|
|
".section .fixup,\"ax\"\n" \
|
|
"3: li %0,%3\n" \
|
|
" b 2b\n" \
|
|
".previous\n" \
|
|
EX_TABLE(1b, 3b) \
|
|
: "=r" (err) \
|
|
: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
|
|
|
|
#ifdef __powerpc64__
|
|
#define __put_user_asm2(x, ptr, retval) \
|
|
__put_user_asm(x, ptr, retval, "std")
|
|
#else /* __powerpc64__ */
|
|
#define __put_user_asm2(x, addr, err) \
|
|
__asm__ __volatile__( \
|
|
"1: stw %1,0(%2)\n" \
|
|
"2: stw %1+1,4(%2)\n" \
|
|
"3:\n" \
|
|
".section .fixup,\"ax\"\n" \
|
|
"4: li %0,%3\n" \
|
|
" b 3b\n" \
|
|
".previous\n" \
|
|
EX_TABLE(1b, 4b) \
|
|
EX_TABLE(2b, 4b) \
|
|
: "=r" (err) \
|
|
: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
|
|
#endif /* __powerpc64__ */
|
|
|
|
#define __put_user_size(x, ptr, size, retval) \
|
|
do { \
|
|
retval = 0; \
|
|
switch (size) { \
|
|
case 1: __put_user_asm(x, ptr, retval, "stb"); break; \
|
|
case 2: __put_user_asm(x, ptr, retval, "sth"); break; \
|
|
case 4: __put_user_asm(x, ptr, retval, "stw"); break; \
|
|
case 8: __put_user_asm2(x, ptr, retval); break; \
|
|
default: __put_user_bad(); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define __put_user_nocheck(x, ptr, size) \
|
|
({ \
|
|
long __pu_err; \
|
|
__typeof__(*(ptr)) __user *__pu_addr = (ptr); \
|
|
if (!is_kernel_addr((unsigned long)__pu_addr)) \
|
|
might_fault(); \
|
|
__chk_user_ptr(ptr); \
|
|
__put_user_size((x), __pu_addr, (size), __pu_err); \
|
|
__pu_err; \
|
|
})
|
|
|
|
#define __put_user_check(x, ptr, size) \
|
|
({ \
|
|
long __pu_err = -EFAULT; \
|
|
__typeof__(*(ptr)) __user *__pu_addr = (ptr); \
|
|
might_fault(); \
|
|
if (access_ok(VERIFY_WRITE, __pu_addr, size)) \
|
|
__put_user_size((x), __pu_addr, (size), __pu_err); \
|
|
__pu_err; \
|
|
})
|
|
|
|
#define __put_user_nosleep(x, ptr, size) \
|
|
({ \
|
|
long __pu_err; \
|
|
__typeof__(*(ptr)) __user *__pu_addr = (ptr); \
|
|
__chk_user_ptr(ptr); \
|
|
__put_user_size((x), __pu_addr, (size), __pu_err); \
|
|
__pu_err; \
|
|
})
|
|
|
|
|
|
extern long __get_user_bad(void);
|
|
|
|
/*
|
|
* This does an atomic 128 byte aligned load from userspace.
|
|
* Upto caller to do enable_kernel_vmx() before calling!
|
|
*/
|
|
#define __get_user_atomic_128_aligned(kaddr, uaddr, err) \
|
|
__asm__ __volatile__( \
|
|
"1: lvx 0,0,%1 # get user\n" \
|
|
" stvx 0,0,%2 # put kernel\n" \
|
|
"2:\n" \
|
|
".section .fixup,\"ax\"\n" \
|
|
"3: li %0,%3\n" \
|
|
" b 2b\n" \
|
|
".previous\n" \
|
|
EX_TABLE(1b, 3b) \
|
|
: "=r" (err) \
|
|
: "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err))
|
|
|
|
#define __get_user_asm(x, addr, err, op) \
|
|
__asm__ __volatile__( \
|
|
"1: "op" %1,0(%2) # get_user\n" \
|
|
"2:\n" \
|
|
".section .fixup,\"ax\"\n" \
|
|
"3: li %0,%3\n" \
|
|
" li %1,0\n" \
|
|
" b 2b\n" \
|
|
".previous\n" \
|
|
EX_TABLE(1b, 3b) \
|
|
: "=r" (err), "=r" (x) \
|
|
: "b" (addr), "i" (-EFAULT), "0" (err))
|
|
|
|
#ifdef __powerpc64__
|
|
#define __get_user_asm2(x, addr, err) \
|
|
__get_user_asm(x, addr, err, "ld")
|
|
#else /* __powerpc64__ */
|
|
#define __get_user_asm2(x, addr, err) \
|
|
__asm__ __volatile__( \
|
|
"1: lwz %1,0(%2)\n" \
|
|
"2: lwz %1+1,4(%2)\n" \
|
|
"3:\n" \
|
|
".section .fixup,\"ax\"\n" \
|
|
"4: li %0,%3\n" \
|
|
" li %1,0\n" \
|
|
" li %1+1,0\n" \
|
|
" b 3b\n" \
|
|
".previous\n" \
|
|
EX_TABLE(1b, 4b) \
|
|
EX_TABLE(2b, 4b) \
|
|
: "=r" (err), "=&r" (x) \
|
|
: "b" (addr), "i" (-EFAULT), "0" (err))
|
|
#endif /* __powerpc64__ */
|
|
|
|
#define __get_user_size(x, ptr, size, retval) \
|
|
do { \
|
|
retval = 0; \
|
|
__chk_user_ptr(ptr); \
|
|
if (size > sizeof(x)) \
|
|
(x) = __get_user_bad(); \
|
|
switch (size) { \
|
|
case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \
|
|
case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \
|
|
case 4: __get_user_asm(x, ptr, retval, "lwz"); break; \
|
|
case 8: __get_user_asm2(x, ptr, retval); break; \
|
|
default: (x) = __get_user_bad(); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define __get_user_nocheck(x, ptr, size) \
|
|
({ \
|
|
long __gu_err; \
|
|
unsigned long __gu_val; \
|
|
const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
|
|
__chk_user_ptr(ptr); \
|
|
if (!is_kernel_addr((unsigned long)__gu_addr)) \
|
|
might_fault(); \
|
|
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
|
|
(x) = (__typeof__(*(ptr)))__gu_val; \
|
|
__gu_err; \
|
|
})
|
|
|
|
#define __get_user_check(x, ptr, size) \
|
|
({ \
|
|
long __gu_err = -EFAULT; \
|
|
unsigned long __gu_val = 0; \
|
|
const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
|
|
might_fault(); \
|
|
if (access_ok(VERIFY_READ, __gu_addr, (size))) \
|
|
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
|
|
(x) = (__force __typeof__(*(ptr)))__gu_val; \
|
|
__gu_err; \
|
|
})
|
|
|
|
#define __get_user_nosleep(x, ptr, size) \
|
|
({ \
|
|
long __gu_err; \
|
|
unsigned long __gu_val; \
|
|
const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
|
|
__chk_user_ptr(ptr); \
|
|
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
|
|
(x) = (__force __typeof__(*(ptr)))__gu_val; \
|
|
__gu_err; \
|
|
})
|
|
|
|
|
|
/* more complex routines */
|
|
|
|
extern unsigned long __copy_tofrom_user(void __user *to,
|
|
const void __user *from, unsigned long size);
|
|
|
|
#ifdef __powerpc64__
|
|
static inline unsigned long
|
|
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
|
|
{
|
|
return __copy_tofrom_user(to, from, n);
|
|
}
|
|
#endif /* __powerpc64__ */
|
|
|
|
static inline unsigned long raw_copy_from_user(void *to,
|
|
const void __user *from, unsigned long n)
|
|
{
|
|
if (__builtin_constant_p(n) && (n <= 8)) {
|
|
unsigned long ret = 1;
|
|
|
|
switch (n) {
|
|
case 1:
|
|
__get_user_size(*(u8 *)to, from, 1, ret);
|
|
break;
|
|
case 2:
|
|
__get_user_size(*(u16 *)to, from, 2, ret);
|
|
break;
|
|
case 4:
|
|
__get_user_size(*(u32 *)to, from, 4, ret);
|
|
break;
|
|
case 8:
|
|
__get_user_size(*(u64 *)to, from, 8, ret);
|
|
break;
|
|
}
|
|
if (ret == 0)
|
|
return 0;
|
|
}
|
|
|
|
return __copy_tofrom_user((__force void __user *)to, from, n);
|
|
}
|
|
|
|
static inline unsigned long raw_copy_to_user(void __user *to,
|
|
const void *from, unsigned long n)
|
|
{
|
|
if (__builtin_constant_p(n) && (n <= 8)) {
|
|
unsigned long ret = 1;
|
|
|
|
switch (n) {
|
|
case 1:
|
|
__put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret);
|
|
break;
|
|
case 2:
|
|
__put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret);
|
|
break;
|
|
case 4:
|
|
__put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret);
|
|
break;
|
|
case 8:
|
|
__put_user_size(*(u64 *)from, (u64 __user *)to, 8, ret);
|
|
break;
|
|
}
|
|
if (ret == 0)
|
|
return 0;
|
|
}
|
|
|
|
return __copy_tofrom_user(to, (__force const void __user *)from, n);
|
|
}
|
|
|
|
extern unsigned long __clear_user(void __user *addr, unsigned long size);
|
|
|
|
static inline unsigned long clear_user(void __user *addr, unsigned long size)
|
|
{
|
|
might_fault();
|
|
if (likely(access_ok(VERIFY_WRITE, addr, size)))
|
|
return __clear_user(addr, size);
|
|
return size;
|
|
}
|
|
|
|
extern long strncpy_from_user(char *dst, const char __user *src, long count);
|
|
extern __must_check long strnlen_user(const char __user *str, long n);
|
|
|
|
extern long __copy_from_user_flushcache(void *dst, const void __user *src,
|
|
unsigned size);
|
|
extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
|
|
size_t len);
|
|
|
|
#endif /* _ARCH_POWERPC_UACCESS_H */
|