x86: simplify load_unaligned_zeropad() implementation

The exception for the "unaligned access at the end of the page, next
page not mapped" never happens, but the fixup code ends up causing
trouble for compilers to optimize well.

clang in particular ends up seeing it being in the middle of a loop, and
tries desperately to optimize the exception fixup code that is never
really reached.

The simple solution is to just move all the fixups into the exception
handler itself, which moves it all out of the hot case code, and means
that the compiler never sees it or needs to worry about it.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Linus Torvalds 2022-08-14 14:16:13 -07:00
parent 415d832497
commit c4e34dd99f
3 changed files with 60 additions and 43 deletions

View File

@ -64,4 +64,6 @@
#define EX_TYPE_UCOPY_LEN4 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4))
#define EX_TYPE_UCOPY_LEN8 (EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8))
#define EX_TYPE_ZEROPAD 20 /* longword load with zeropad on fault */
#endif

View File

@ -77,58 +77,18 @@ static inline unsigned long find_zero(unsigned long mask)
* and the next page not being mapped, take the exception and
* return zeroes in the non-existing part.
*/
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long offset, data;
unsigned long ret;
asm_volatile_goto(
asm volatile(
"1: mov %[mem], %[ret]\n"
_ASM_EXTABLE(1b, %l[do_exception])
: [ret] "=r" (ret)
: [mem] "m" (*(unsigned long *)addr)
: : do_exception);
return ret;
do_exception:
offset = (unsigned long)addr & (sizeof(long) - 1);
addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1));
data = *(unsigned long *)addr;
ret = data >> offset * 8;
return ret;
}
#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long offset, data;
unsigned long ret, err = 0;
asm( "1: mov %[mem], %[ret]\n"
"2:\n"
_ASM_EXTABLE_FAULT(1b, 2b)
: [ret] "=&r" (ret), "+a" (err)
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_ZEROPAD)
: [ret] "=r" (ret)
: [mem] "m" (*(unsigned long *)addr));
if (unlikely(err)) {
offset = (unsigned long)addr & (sizeof(long) - 1);
addr = (void *)((unsigned long)addr & ~(sizeof(long) - 1));
data = *(unsigned long *)addr;
ret = data >> offset * 8;
}
return ret;
}
#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
#endif /* _ASM_WORD_AT_A_TIME_H */

View File

@ -41,6 +41,59 @@ static bool ex_handler_default(const struct exception_table_entry *e,
return true;
}
/*
* This is the *very* rare case where we do a "load_unaligned_zeropad()"
* and it's a page crosser into a non-existent page.
*
* This happens when we optimistically load a pathname a word-at-a-time
* and the name is less than the full word and the next page is not
* mapped. Typically that only happens for CONFIG_DEBUG_PAGEALLOC.
*
* NOTE! The faulting address is always a 'mov mem,reg' type instruction
* of size 'long', and the exception fixup must always point to right
* after the instruction.
*/
static bool ex_handler_zeropad(const struct exception_table_entry *e,
struct pt_regs *regs,
unsigned long fault_addr)
{
struct insn insn;
const unsigned long mask = sizeof(long) - 1;
unsigned long offset, addr, next_ip, len;
unsigned long *reg;
next_ip = ex_fixup_addr(e);
len = next_ip - regs->ip;
if (len > MAX_INSN_SIZE)
return false;
if (insn_decode(&insn, (void *) regs->ip, len, INSN_MODE_KERN))
return false;
if (insn.length != len)
return false;
if (insn.opcode.bytes[0] != 0x8b)
return false;
if (insn.opnd_bytes != sizeof(long))
return false;
addr = (unsigned long) insn_get_addr_ref(&insn, regs);
if (addr == ~0ul)
return false;
offset = addr & mask;
addr = addr & ~mask;
if (fault_addr != addr + sizeof(long))
return false;
reg = insn_get_modrm_reg_ptr(&insn, regs);
if (!reg)
return false;
*reg = *(unsigned long *)addr >> (offset * 8);
return ex_handler_default(e, regs);
}
static bool ex_handler_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
@ -217,6 +270,8 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
return ex_handler_sgx(e, regs, trapnr);
case EX_TYPE_UCOPY_LEN:
return ex_handler_ucopy_len(e, regs, trapnr, reg, imm);
case EX_TYPE_ZEROPAD:
return ex_handler_zeropad(e, regs, fault_addr);
}
BUG();
}