powerpc/dexcr: Move HASHCHK trap handler

Syzkaller reported a sleep in atomic context bug relating to the HASHCHK
handler logic:

  BUG: sleeping function called from invalid context at arch/powerpc/kernel/traps.c:1518
  in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 25040, name: syz-executor
  preempt_count: 0, expected: 0
  RCU nest depth: 0, expected: 0
  no locks held by syz-executor/25040.
  irq event stamp: 34
  hardirqs last  enabled at (33): [<c000000000048b38>] prep_irq_for_enabled_exit arch/powerpc/kernel/interrupt.c:56 [inline]
  hardirqs last  enabled at (33): [<c000000000048b38>] interrupt_exit_user_prepare_main+0x148/0x600 arch/powerpc/kernel/interrupt.c:230
  hardirqs last disabled at (34): [<c00000000003e6a4>] interrupt_enter_prepare+0x144/0x4f0 arch/powerpc/include/asm/interrupt.h:176
  softirqs last  enabled at (0): [<c000000000281954>] copy_process+0x16e4/0x4750 kernel/fork.c:2436
  softirqs last disabled at (0): [<0000000000000000>] 0x0
  CPU: 15 PID: 25040 Comm: syz-executor Not tainted 6.5.0-rc5-00001-g3ccdff6bb06d #3
  Hardware name: IBM,9105-22A POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1040.00 (NL1040_021) hv:phyp pSeries
  Call Trace:
  [c0000000a8247ce0] [c00000000032b0e4] __might_resched+0x3b4/0x400 kernel/sched/core.c:10189
  [c0000000a8247d80] [c0000000008c7dc8] __might_fault+0xa8/0x170 mm/memory.c:5853
  [c0000000a8247dc0] [c00000000004160c] do_program_check+0x32c/0xb20 arch/powerpc/kernel/traps.c:1518
  [c0000000a8247e50] [c000000000009b2c] program_check_common_virt+0x3bc/0x3c0

To determine if a trap was caused by a HASHCHK instruction, we inspect
the user instruction that triggered the trap. However this may sleep
if the page needs to be faulted in (get_user_instr() reaches
__get_user(), which calls might_fault() and triggers the bug message).

Move the HASHCHK handler logic to after we allow IRQs, which is fine
because we are only interested in HASHCHK if it's a user space trap.

Fixes: 5bcba4e6c1 ("powerpc/dexcr: Handle hashchk exception")
Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20230915034604.45393-1-bgray@linux.ibm.com
This commit is contained in:
Benjamin Gray 2023-09-15 13:46:04 +10:00 committed by Michael Ellerman
parent 6901a9f9ef
commit c3f4309693

View file

@ -1512,23 +1512,11 @@ static void do_program_check(struct pt_regs *regs)
return;
}
if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && user_mode(regs)) {
ppc_inst_t insn;
if (get_user_instr(insn, (void __user *)regs->nip)) {
_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
return;
}
if (ppc_inst_primary_opcode(insn) == 31 &&
get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) {
_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
return;
}
/* User mode considers other cases after enabling IRQs */
if (!user_mode(regs)) {
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
return;
}
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
return;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (reason & REASON_TM) {
@ -1561,16 +1549,44 @@ static void do_program_check(struct pt_regs *regs)
/*
* If we took the program check in the kernel skip down to sending a
* SIGILL. The subsequent cases all relate to emulating instructions
* which we should only do for userspace. We also do not want to enable
* interrupts for kernel faults because that might lead to further
* faults, and loose the context of the original exception.
* SIGILL. The subsequent cases all relate to user space, such as
* emulating instructions which we should only do for user space. We
* also do not want to enable interrupts for kernel faults because that
* might lead to further faults, and loose the context of the original
* exception.
*/
if (!user_mode(regs))
goto sigill;
interrupt_cond_local_irq_enable(regs);
/*
* (reason & REASON_TRAP) is mostly handled before enabling IRQs,
* except get_user_instr() can sleep so we cannot reliably inspect the
* current instruction in that context. Now that we know we are
* handling a user space trap and can sleep, we can check if the trap
* was a hashchk failure.
*/
if (reason & REASON_TRAP) {
if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) {
ppc_inst_t insn;
if (get_user_instr(insn, (void __user *)regs->nip)) {
_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
return;
}
if (ppc_inst_primary_opcode(insn) == 31 &&
get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) {
_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
return;
}
}
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
return;
}
/* (reason & REASON_ILLEGAL) would be the obvious thing here,
* but there seems to be a hardware bug on the 405GP (RevD)
* that means ESR is sometimes set incorrectly - either to