diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 8cc918acbabc..a0533e672496 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -83,7 +83,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) */ __must_check unsigned long -clear_user_original(void __user *addr, unsigned long len); +rep_stos_alternative(void __user *addr, unsigned long len); static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size) { @@ -97,7 +97,7 @@ static __always_inline __must_check unsigned long __clear_user(void __user *addr asm volatile( "1:\n\t" ALTERNATIVE("rep stosb", - "call clear_user_original", ALT_NOT(X86_FEATURE_FSRS)) + "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS)) "2:\n" _ASM_EXTABLE_UA(1b, 2b) : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index fcd01b9f8d50..f74a3e704a1c 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -57,59 +57,85 @@ EXPORT_SYMBOL_GPL(clear_page_erms) * Input: * rdi destination * rcx count + * rax is zero * * Output: * rcx: uncleared bytes or 0 if successful. */ -SYM_FUNC_START(clear_user_original) - /* - * Copy only the lower 32 bits of size as that is enough to handle the rest bytes, - * i.e., no need for a 'q' suffix and thus a REX prefix. - */ - mov %ecx,%eax - shr $3,%rcx - jz .Lrest_bytes +SYM_FUNC_START(rep_stos_alternative) + cmpq $64,%rcx + jae .Lunrolled - # do the qwords first - .p2align 4 -.Lqwords: - movq $0,(%rdi) - lea 8(%rdi),%rdi - dec %rcx - jnz .Lqwords + cmp $8,%ecx + jae .Lword -.Lrest_bytes: - and $7, %eax - jz .Lexit + testl %ecx,%ecx + je .Lexit - # now do the rest bytes -.Lbytes: - movb $0,(%rdi) +.Lclear_user_tail: +0: movb %al,(%rdi) inc %rdi - dec %eax - jnz .Lbytes - + dec %rcx + jnz .Lclear_user_tail .Lexit: + RET + + _ASM_EXTABLE_UA( 0b, .Lexit) + +.Lword: +1: movq %rax,(%rdi) + addq $8,%rdi + sub $8,%ecx + je .Lexit + cmp $8,%ecx + jae .Lword + jmp .Lclear_user_tail + + .p2align 4 +.Lunrolled: +10: movq %rax,(%rdi) +11: movq %rax,8(%rdi) +12: movq %rax,16(%rdi) +13: movq %rax,24(%rdi) +14: movq %rax,32(%rdi) +15: movq %rax,40(%rdi) +16: movq %rax,48(%rdi) +17: movq %rax,56(%rdi) + addq $64,%rdi + subq $64,%rcx + cmpq $64,%rcx + jae .Lunrolled + cmpl $8,%ecx + jae .Lword + testl %ecx,%ecx + jne .Lclear_user_tail + RET + /* - * %rax still needs to be cleared in the exception case because this function is called - * from inline asm and the compiler expects %rax to be zero when exiting the inline asm, - * in case it might reuse it somewhere. + * If we take an exception on any of the + * word stores, we know that %rcx isn't zero, + * so we can just go to the tail clearing to + * get the exact count. + * + * The unrolled case might end up clearing + * some bytes twice. Don't care. + * + * We could use the value in %rdi to avoid + * a second fault on the exact count case, + * but do we really care? No. + * + * Finally, we could try to align %rdi at the + * top of the unrolling. But unaligned stores + * just aren't that common or expensive. */ - xor %eax,%eax - RET - -.Lqwords_exception: - # convert remaining qwords back into bytes to return to caller - shl $3, %rcx - and $7, %eax - add %rax,%rcx - jmp .Lexit - -.Lbytes_exception: - mov %eax,%ecx - jmp .Lexit - - _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exception) - _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception) -SYM_FUNC_END(clear_user_original) -EXPORT_SYMBOL(clear_user_original) + _ASM_EXTABLE_UA( 1b, .Lclear_user_tail) + _ASM_EXTABLE_UA(10b, .Lclear_user_tail) + _ASM_EXTABLE_UA(11b, .Lclear_user_tail) + _ASM_EXTABLE_UA(12b, .Lclear_user_tail) + _ASM_EXTABLE_UA(13b, .Lclear_user_tail) + _ASM_EXTABLE_UA(14b, .Lclear_user_tail) + _ASM_EXTABLE_UA(15b, .Lclear_user_tail) + _ASM_EXTABLE_UA(16b, .Lclear_user_tail) + _ASM_EXTABLE_UA(17b, .Lclear_user_tail) +SYM_FUNC_END(rep_stos_alternative) +EXPORT_SYMBOL(rep_stos_alternative) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 44817bbe48fe..ac96c9939cd1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1284,7 +1284,7 @@ static const char *uaccess_safe_builtin[] = { "copy_mc_fragile_handle_tail", "copy_mc_enhanced_fast_string", "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ - "clear_user_original", + "rep_stos_alternative", "copy_user_generic_unrolled", "__copy_user_nocache", NULL