linux-stable/arch/powerpc/kernel/ptrace/ptrace.c

451 lines
12 KiB
C
Raw Normal View History

/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
*
* Derived from "arch/m68k/kernel/ptrace.c"
* Copyright (C) 1994 by Hamish Macdonald
* Taken from linux/kernel/ptrace.c and modified for M680x0.
* linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
*
* Modified by Cort Dougan (cort@hq.fsmlabs.com)
* and Paul Mackerras (paulus@samba.org).
*
* This file is subject to the terms and conditions of the GNU General
* Public License. See the file README.legal in the main directory of
* this archive for more details.
*/
#include <linux/regset.h>
#include <linux/ptrace.h>
#include <linux/audit.h>
#include <linux/context_tracking.h>
#include <linux/syscalls.h>
#include <asm/switch_to.h>
#include <asm/debug.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
#include "ptrace-decl.h"
/*
* Called by kernel/ptrace.c when detaching..
*
* Make sure single step bits etc are not set.
*/
void ptrace_disable(struct task_struct *child)
{
/* make sure the single step bit is not set. */
user_disable_single_step(child);
}
long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
{
int ret = -EPERM;
void __user *datavp = (void __user *) data;
unsigned long __user *datalp = datavp;
switch (request) {
/* read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
unsigned long index, tmp;
ret = -EIO;
/* convert to index and check */
index = addr / sizeof(long);
if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
break;
if (index < PT_FPR0)
ret = ptrace_get_reg(child, (int) index, &tmp);
else
ret = ptrace_get_fpr(child, index, &tmp);
if (ret)
break;
ret = put_user(tmp, datalp);
break;
}
/* write the word at location addr in the USER area */
case PTRACE_POKEUSR: {
unsigned long index;
ret = -EIO;
/* convert to index and check */
index = addr / sizeof(long);
if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
break;
if (index < PT_FPR0)
ret = ptrace_put_reg(child, index, data);
else
ret = ptrace_put_fpr(child, index, data);
break;
}
case PPC_PTRACE_GETHWDBGINFO: {
struct ppc_debug_info dbginfo;
ppc_gethwdinfo(&dbginfo);
if (copy_to_user(datavp, &dbginfo,
sizeof(struct ppc_debug_info)))
return -EFAULT;
return 0;
}
case PPC_PTRACE_SETHWDEBUG: {
struct ppc_hw_breakpoint bp_info;
if (copy_from_user(&bp_info, datavp,
sizeof(struct ppc_hw_breakpoint)))
return -EFAULT;
return ppc_set_hwdebug(child, &bp_info);
}
case PPC_PTRACE_DELHWDEBUG: {
ret = ppc_del_hwdebug(child, data);
break;
}
case PTRACE_GET_DEBUGREG:
ret = ptrace_get_debugreg(child, addr, datalp);
break;
case PTRACE_SET_DEBUGREG:
ret = ptrace_set_debugreg(child, addr, data);
break;
[POWERPC] ptrace updates & new, better requests The powerpc ptrace interface is dodgy at best. We have defined our "own" versions of GETREGS/SETREGS/GETFPREGS/SETFPREGS that strangely take arguments in reverse order from other archs (in addition to having different request numbers) and have subtle issue, like not accessing all of the registers in their respective categories. This patch moves the implementation of those to a separate function in order to facilitate their deprecation in the future, and provides new ptrace requests that mirror the x86 and sparc ones and use the same numbers: PTRACE_GETREGS : returns an entire pt_regs (the whole thing, not only the 32 GPRs, though that doesn't include the FPRs etc... There's a compat version for 32 bits that returns a 32 bits compatible pt_regs (44 uints) PTRACE_SETREGS : sets an entire pt_regs (the whole thing, not only the 32 GPRs, though that doesn't include the FPRs etc... Some registers cannot be written to and will just be dropped, this is the same as with POKEUSR, that is anything above MQ on 32 bits and CCR on 64 bits. There is a compat version as well. PTRACE_GETFPREGS : returns all the FP registers -including- the FPSCR that is 33 doubles (regardless of 32/64 bits) PTRACE_SETFPREGS : sets all the FP registers -including- the FPSCR that is 33 doubles (regardless of 32/64 bits) And two that only exist on 64 bits kernels: PTRACE_GETREGS64 : Same as PTRACE_GETREGS, except there is no compat function, a 32 bits process will obtain the full 64 bits registers PTRACE_SETREGS64 : Same as PTRACE_SETREGS, except there is no compat function, a 32 bits process will set the full 64 bits registers The two later ones makes things easier to have a 32 bits debugger on a 64 bits program (or on a 32 bits program that uses the full 64 bits of the GPRs, which is possible though has issues that will be fixed in a later patch). Finally, while at it, the patch removes a whole bunch of code duplication between ptrace32.c and ptrace.c, in large part by having the former call into the later for all requests that don't need any special "compat" treatment. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-04 05:15:43 +00:00
#ifdef CONFIG_PPC64
case PTRACE_GETREGS64:
#endif
case PTRACE_GETREGS: /* Get all pt_regs from the child. */
return copy_regset_to_user(child, &user_ppc_native_view,
REGSET_GPR,
0, sizeof(struct user_pt_regs),
datavp);
[POWERPC] ptrace updates & new, better requests The powerpc ptrace interface is dodgy at best. We have defined our "own" versions of GETREGS/SETREGS/GETFPREGS/SETFPREGS that strangely take arguments in reverse order from other archs (in addition to having different request numbers) and have subtle issue, like not accessing all of the registers in their respective categories. This patch moves the implementation of those to a separate function in order to facilitate their deprecation in the future, and provides new ptrace requests that mirror the x86 and sparc ones and use the same numbers: PTRACE_GETREGS : returns an entire pt_regs (the whole thing, not only the 32 GPRs, though that doesn't include the FPRs etc... There's a compat version for 32 bits that returns a 32 bits compatible pt_regs (44 uints) PTRACE_SETREGS : sets an entire pt_regs (the whole thing, not only the 32 GPRs, though that doesn't include the FPRs etc... Some registers cannot be written to and will just be dropped, this is the same as with POKEUSR, that is anything above MQ on 32 bits and CCR on 64 bits. There is a compat version as well. PTRACE_GETFPREGS : returns all the FP registers -including- the FPSCR that is 33 doubles (regardless of 32/64 bits) PTRACE_SETFPREGS : sets all the FP registers -including- the FPSCR that is 33 doubles (regardless of 32/64 bits) And two that only exist on 64 bits kernels: PTRACE_GETREGS64 : Same as PTRACE_GETREGS, except there is no compat function, a 32 bits process will obtain the full 64 bits registers PTRACE_SETREGS64 : Same as PTRACE_SETREGS, except there is no compat function, a 32 bits process will set the full 64 bits registers The two later ones makes things easier to have a 32 bits debugger on a 64 bits program (or on a 32 bits program that uses the full 64 bits of the GPRs, which is possible though has issues that will be fixed in a later patch). Finally, while at it, the patch removes a whole bunch of code duplication between ptrace32.c and ptrace.c, in large part by having the former call into the later for all requests that don't need any special "compat" treatment. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-06-04 05:15:43 +00:00
#ifdef CONFIG_PPC64
case PTRACE_SETREGS64:
#endif
case PTRACE_SETREGS: /* Set all gp regs in the child. */
return copy_regset_from_user(child, &user_ppc_native_view,
REGSET_GPR,
0, sizeof(struct user_pt_regs),
datavp);
case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
return copy_regset_to_user(child, &user_ppc_native_view,
REGSET_FPR,
0, sizeof(elf_fpregset_t),
datavp);
case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
return copy_regset_from_user(child, &user_ppc_native_view,
REGSET_FPR,
0, sizeof(elf_fpregset_t),
datavp);
#ifdef CONFIG_ALTIVEC
case PTRACE_GETVRREGS:
return copy_regset_to_user(child, &user_ppc_native_view,
REGSET_VMX,
0, (33 * sizeof(vector128) +
sizeof(u32)),
datavp);
case PTRACE_SETVRREGS:
return copy_regset_from_user(child, &user_ppc_native_view,
REGSET_VMX,
0, (33 * sizeof(vector128) +
sizeof(u32)),
datavp);
#endif
#ifdef CONFIG_VSX
case PTRACE_GETVSRREGS:
return copy_regset_to_user(child, &user_ppc_native_view,
REGSET_VSX,
0, 32 * sizeof(double),
datavp);
case PTRACE_SETVSRREGS:
return copy_regset_from_user(child, &user_ppc_native_view,
REGSET_VSX,
0, 32 * sizeof(double),
datavp);
#endif
#ifdef CONFIG_SPE
case PTRACE_GETEVRREGS:
/* Get the child spe register state. */
return copy_regset_to_user(child, &user_ppc_native_view,
REGSET_SPE, 0, 35 * sizeof(u32),
datavp);
case PTRACE_SETEVRREGS:
/* Set the child spe register state. */
return copy_regset_from_user(child, &user_ppc_native_view,
REGSET_SPE, 0, 35 * sizeof(u32),
datavp);
#endif
default:
ret = ptrace_request(child, request, addr, data);
break;
}
return ret;
}
#ifdef CONFIG_SECCOMP
static int do_seccomp(struct pt_regs *regs)
{
if (!test_thread_flag(TIF_SECCOMP))
return 0;
/*
* The ABI we present to seccomp tracers is that r3 contains
* the syscall return value and orig_gpr3 contains the first
* syscall parameter. This is different to the ptrace ABI where
* both r3 and orig_gpr3 contain the first syscall parameter.
*/
regs->gpr[3] = -ENOSYS;
/*
* We use the __ version here because we have already checked
* TIF_SECCOMP. If this fails, there is nothing left to do, we
* have already loaded -ENOSYS into r3, or seccomp has put
* something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
*/
if (__secure_computing(NULL))
return -1;
/*
* The syscall was allowed by seccomp, restore the register
* state to what audit expects.
* Note that we use orig_gpr3, which means a seccomp tracer can
* modify the first syscall parameter (in orig_gpr3) and also
* allow the syscall to proceed.
*/
regs->gpr[3] = regs->orig_gpr3;
return 0;
}
#else
static inline int do_seccomp(struct pt_regs *regs) { return 0; }
#endif /* CONFIG_SECCOMP */
/**
* do_syscall_trace_enter() - Do syscall tracing on kernel entry.
* @regs: the pt_regs of the task to trace (current)
*
* Performs various types of tracing on syscall entry. This includes seccomp,
* ptrace, syscall tracepoints and audit.
*
* The pt_regs are potentially visible to userspace via ptrace, so their
* contents is ABI.
*
* One or more of the tracers may modify the contents of pt_regs, in particular
* to modify arguments or even the syscall number itself.
*
* It's also possible that a tracer can choose to reject the system call. In
* that case this function will return an illegal syscall number, and will put
* an appropriate return value in regs->r3.
*
* Return: the (possibly changed) syscall number.
*/
long do_syscall_trace_enter(struct pt_regs *regs)
{
u32 flags;
flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
if (flags) {
int rc = ptrace_report_syscall_entry(regs);
if (unlikely(flags & _TIF_SYSCALL_EMU)) {
/*
* A nonzero return code from
* ptrace_report_syscall_entry() tells us to prevent
* the syscall execution, but we are not going to
* execute it anyway.
*
* Returning -1 will skip the syscall execution. We want
* to avoid clobbering any registers, so we don't goto
* the skip label below.
*/
return -1;
}
if (rc) {
/*
* The tracer decided to abort the syscall. Note that
* the tracer may also just change regs->gpr[0] to an
* invalid syscall number, that is handled below on the
* exit path.
*/
goto skip;
}
}
/* Run seccomp after ptrace; allow it to set gpr[3]. */
if (do_seccomp(regs))
return -1;
/* Avoid trace and audit when syscall is invalid. */
if (regs->gpr[0] >= NR_syscalls)
goto skip;
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->gpr[0]);
if (!is_32bit_task())
audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
regs->gpr[5], regs->gpr[6]);
else
audit_syscall_entry(regs->gpr[0],
regs->gpr[3] & 0xffffffff,
regs->gpr[4] & 0xffffffff,
regs->gpr[5] & 0xffffffff,
regs->gpr[6] & 0xffffffff);
/* Return the possibly modified but valid syscall number */
return regs->gpr[0];
skip:
/*
* If we are aborting explicitly, or if the syscall number is
* now invalid, set the return value to -ENOSYS.
*/
regs->gpr[3] = -ENOSYS;
return -1;
}
void do_syscall_trace_leave(struct pt_regs *regs)
{
int step;
Audit: push audit success and retcode into arch ptrace.h The audit system previously expected arches calling to audit_syscall_exit to supply as arguments if the syscall was a success and what the return code was. Audit also provides a helper AUDITSC_RESULT which was supposed to simplify things by converting from negative retcodes to an audit internal magic value stating success or failure. This helper was wrong and could indicate that a valid pointer returned to userspace was a failed syscall. The fix is to fix the layering foolishness. We now pass audit_syscall_exit a struct pt_reg and it in turns calls back into arch code to collect the return value and to determine if the syscall was a success or failure. We also define a generic is_syscall_success() macro which determines success/failure based on if the value is < -MAX_ERRNO. This works for arches like x86 which do not use a separate mechanism to indicate syscall failure. We make both the is_syscall_success() and regs_return_value() static inlines instead of macros. The reason is because the audit function must take a void* for the regs. (uml calls theirs struct uml_pt_regs instead of just struct pt_regs so audit_syscall_exit can't take a struct pt_regs). Since the audit function takes a void* we need to use static inlines to cast it back to the arch correct structure to dereference it. The other major change is that on some arches, like ia64, MIPS and ppc, we change regs_return_value() to give us the negative value on syscall failure. THE only other user of this macro, kretprobe_example.c, won't notice and it makes the value signed consistently for the audit functions across all archs. In arch/sh/kernel/ptrace_64.c I see that we were using regs[9] in the old audit code as the return value. But the ptrace_64.h code defined the macro regs_return_value() as regs[3]. I have no idea which one is correct, but this patch now uses the regs_return_value() function, so it now uses regs[3]. For powerpc we previously used regs->result but now use the regs_return_value() function which uses regs->gprs[3]. regs->gprs[3] is always positive so the regs_return_value(), much like ia64 makes it negative before calling the audit code when appropriate. Signed-off-by: Eric Paris <eparis@redhat.com> Acked-by: H. Peter Anvin <hpa@zytor.com> [for x86 portion] Acked-by: Tony Luck <tony.luck@intel.com> [for ia64] Acked-by: Richard Weinberger <richard@nod.at> [for uml] Acked-by: David S. Miller <davem@davemloft.net> [for sparc] Acked-by: Ralf Baechle <ralf@linux-mips.org> [for mips] Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [for ppc]
2012-01-03 19:23:06 +00:00
audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->result);
step = test_thread_flag(TIF_SINGLESTEP);
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
ptrace_report_syscall_exit(regs, step);
}
void __init pt_regs_check(void);
/*
* Dummy function, its purpose is to break the build if struct pt_regs and
* struct user_pt_regs don't match.
*/
void __init pt_regs_check(void)
{
BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
offsetof(struct user_pt_regs, gpr));
BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
offsetof(struct user_pt_regs, nip));
BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
offsetof(struct user_pt_regs, msr));
BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
offsetof(struct user_pt_regs, orig_gpr3));
BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
offsetof(struct user_pt_regs, ctr));
BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
offsetof(struct user_pt_regs, link));
BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
offsetof(struct user_pt_regs, xer));
BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
offsetof(struct user_pt_regs, ccr));
#ifdef __powerpc64__
BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
offsetof(struct user_pt_regs, softe));
#else
BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
offsetof(struct user_pt_regs, mq));
#endif
BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
offsetof(struct user_pt_regs, trap));
BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
offsetof(struct user_pt_regs, dar));
BUILD_BUG_ON(offsetof(struct pt_regs, dear) !=
offsetof(struct user_pt_regs, dar));
BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
offsetof(struct user_pt_regs, dsisr));
BUILD_BUG_ON(offsetof(struct pt_regs, esr) !=
offsetof(struct user_pt_regs, dsisr));
BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
offsetof(struct user_pt_regs, result));
BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
// Now check that the pt_regs offsets match the uapi #defines
#define CHECK_REG(_pt, _reg) \
BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
sizeof(unsigned long)));
CHECK_REG(PT_R0, gpr[0]);
CHECK_REG(PT_R1, gpr[1]);
CHECK_REG(PT_R2, gpr[2]);
CHECK_REG(PT_R3, gpr[3]);
CHECK_REG(PT_R4, gpr[4]);
CHECK_REG(PT_R5, gpr[5]);
CHECK_REG(PT_R6, gpr[6]);
CHECK_REG(PT_R7, gpr[7]);
CHECK_REG(PT_R8, gpr[8]);
CHECK_REG(PT_R9, gpr[9]);
CHECK_REG(PT_R10, gpr[10]);
CHECK_REG(PT_R11, gpr[11]);
CHECK_REG(PT_R12, gpr[12]);
CHECK_REG(PT_R13, gpr[13]);
CHECK_REG(PT_R14, gpr[14]);
CHECK_REG(PT_R15, gpr[15]);
CHECK_REG(PT_R16, gpr[16]);
CHECK_REG(PT_R17, gpr[17]);
CHECK_REG(PT_R18, gpr[18]);
CHECK_REG(PT_R19, gpr[19]);
CHECK_REG(PT_R20, gpr[20]);
CHECK_REG(PT_R21, gpr[21]);
CHECK_REG(PT_R22, gpr[22]);
CHECK_REG(PT_R23, gpr[23]);
CHECK_REG(PT_R24, gpr[24]);
CHECK_REG(PT_R25, gpr[25]);
CHECK_REG(PT_R26, gpr[26]);
CHECK_REG(PT_R27, gpr[27]);
CHECK_REG(PT_R28, gpr[28]);
CHECK_REG(PT_R29, gpr[29]);
CHECK_REG(PT_R30, gpr[30]);
CHECK_REG(PT_R31, gpr[31]);
CHECK_REG(PT_NIP, nip);
CHECK_REG(PT_MSR, msr);
CHECK_REG(PT_ORIG_R3, orig_gpr3);
CHECK_REG(PT_CTR, ctr);
CHECK_REG(PT_LNK, link);
CHECK_REG(PT_XER, xer);
CHECK_REG(PT_CCR, ccr);
#ifdef CONFIG_PPC64
CHECK_REG(PT_SOFTE, softe);
#else
CHECK_REG(PT_MQ, mq);
#endif
CHECK_REG(PT_TRAP, trap);
CHECK_REG(PT_DAR, dar);
CHECK_REG(PT_DSISR, dsisr);
CHECK_REG(PT_RESULT, result);
#undef CHECK_REG
BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
/*
* PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
* real registers.
*/
BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
powerpc/32: Fix overread/overwrite of thread_struct via ptrace The ptrace PEEKUSR/POKEUSR (aka PEEKUSER/POKEUSER) API allows a process to read/write registers of another process. To get/set a register, the API takes an index into an imaginary address space called the "USER area", where the registers of the process are laid out in some fashion. The kernel then maps that index to a particular register in its own data structures and gets/sets the value. The API only allows a single machine-word to be read/written at a time. So 4 bytes on 32-bit kernels and 8 bytes on 64-bit kernels. The way floating point registers (FPRs) are addressed is somewhat complicated, because double precision float values are 64-bit even on 32-bit CPUs. That means on 32-bit kernels each FPR occupies two word-sized locations in the USER area. On 64-bit kernels each FPR occupies one word-sized location in the USER area. Internally the kernel stores the FPRs in an array of u64s, or if VSX is enabled, an array of pairs of u64s where one half of each pair stores the FPR. Which half of the pair stores the FPR depends on the kernel's endianness. To handle the different layouts of the FPRs depending on VSX/no-VSX and big/little endian, the TS_FPR() macro was introduced. Unfortunately the TS_FPR() macro does not take into account the fact that the addressing of each FPR differs between 32-bit and 64-bit kernels. It just takes the index into the "USER area" passed from userspace and indexes into the fp_state.fpr array. On 32-bit there are 64 indexes that address FPRs, but only 32 entries in the fp_state.fpr array, meaning the user can read/write 256 bytes past the end of the array. Because the fp_state sits in the middle of the thread_struct there are various fields than can be overwritten, including some pointers. As such it may be exploitable. It has also been observed to cause systems to hang or otherwise misbehave when using gdbserver, and is probably the root cause of this report which could not be easily reproduced: https://lore.kernel.org/linuxppc-dev/dc38afe9-6b78-f3f5-666b-986939e40fc6@keymile.com/ Rather than trying to make the TS_FPR() macro even more complicated to fix the bug, or add more macros, instead add a special-case for 32-bit kernels. This is more obvious and hopefully avoids a similar bug happening again in future. Note that because 32-bit kernels never have VSX enabled the code doesn't need to consider TS_FPRWIDTH/OFFSET at all. Add a BUILD_BUG_ON() to ensure that 32-bit && VSX is never enabled. Fixes: 87fec0514f61 ("powerpc: PTRACE_PEEKUSR/PTRACE_POKEUSER of FPR registers in little endian builds") Cc: stable@vger.kernel.org # v3.13+ Reported-by: Ariel Miculas <ariel.miculas@belden.com> Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220609133245.573565-1-mpe@ellerman.id.au
2022-06-06 14:34:56 +00:00
// ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible
BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX));
}