x86: don't use REP_GOOD or ERMS for small memory copies

The modern target to use is FSRM (Fast Short REP MOVS), and the other
cases should only be used for bigger areas (ie mainly things like page
copying and clearing).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Linus Torvalds 2023-04-15 11:47:06 -07:00
parent 6a8f57ae2e
commit 68674f94ff
1 changed files with 10 additions and 24 deletions

View File

@ -10,13 +10,6 @@
.section .noinstr.text, "ax"
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
*/
/*
* memcpy - Copy a memory block.
*
@ -27,17 +20,21 @@
*
* Output:
* rax original destination
*
* The FSRM alternative should be done inline (avoiding the call and
* the disgusting return handling), but that would require some help
* from the compiler for better calling conventions.
*
* The 'rep movsb' itself is small enough to replace the call, but the
* two register moves blow up the code. And one of them is "needed"
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/
SYM_TYPED_FUNC_START(__memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
movq %rdi, %rax
movq %rdx, %rcx
shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
rep movsb
RET
SYM_FUNC_END(__memcpy)
@ -46,17 +43,6 @@ EXPORT_SYMBOL(__memcpy)
SYM_FUNC_ALIAS(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)
/*
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
RET
SYM_FUNC_END(memcpy_erms)
SYM_FUNC_START_LOCAL(memcpy_orig)
movq %rdi, %rax