x86/lib/memcpy_64.S: Convert memcpy to ALTERNATIVE_2 macro

Make REP_GOOD variant the default after alternatives have run.

Signed-off-by: Borislav Petkov <bp@suse.de>
This commit is contained in:
Borislav Petkov 2015-02-04 15:36:49 +01:00
parent a77600cd03
commit e0bc8d179e

View file

@ -1,11 +1,19 @@
/* Copyright 2002 Andi Kleen */ /* Copyright 2002 Andi Kleen */
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/dwarf2.h> #include <asm/dwarf2.h>
#include <asm/alternative-asm.h> #include <asm/alternative-asm.h>
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
*/
.weak memcpy
/* /*
* memcpy - Copy a memory block. * memcpy - Copy a memory block.
* *
@ -17,15 +25,11 @@
* Output: * Output:
* rax original destination * rax original destination
*/ */
ENTRY(__memcpy)
ENTRY(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
/*
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax movq %rdi, %rax
movq %rdx, %rcx movq %rdx, %rcx
shrq $3, %rcx shrq $3, %rcx
@ -34,29 +38,21 @@
movl %edx, %ecx movl %edx, %ecx
rep movsb rep movsb
ret ret
.Lmemcpy_e: ENDPROC(memcpy)
.previous ENDPROC(__memcpy)
/* /*
* memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than * memcpy_erms() - enhanced fast string memcpy. This is faster and
* memcpy_c. Use memcpy_c_e when possible. * simpler than memcpy. Use memcpy_erms when possible.
*
* This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/ */
.section .altinstr_replacement, "ax", @progbits ENTRY(memcpy_erms)
.Lmemcpy_c_e:
movq %rdi, %rax movq %rdi, %rax
movq %rdx, %rcx movq %rdx, %rcx
rep movsb rep movsb
ret ret
.Lmemcpy_e_e: ENDPROC(memcpy_erms)
.previous
.weak memcpy ENTRY(memcpy_orig)
ENTRY(__memcpy)
ENTRY(memcpy)
CFI_STARTPROC CFI_STARTPROC
movq %rdi, %rax movq %rdi, %rax
@ -183,26 +179,4 @@ ENTRY(memcpy)
.Lend: .Lend:
retq retq
CFI_ENDPROC CFI_ENDPROC
ENDPROC(memcpy) ENDPROC(memcpy_orig)
ENDPROC(__memcpy)
/*
* Some CPUs are adding enhanced REP MOVSB/STOSB feature
* If the feature is supported, memcpy_c_e() is the first choice.
* If enhanced rep movsb copy is not available, use fast string copy
* memcpy_c() when possible. This is faster and code is simpler than
* original memcpy().
* Otherwise, original memcpy() is used.
* In .altinstructions section, ERMS feature is placed after REG_GOOD
* feature to implement the right patch order.
*
* Replace only beginning, memcpy is used to apply alternatives,
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
.section .altinstructions, "a"
altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
.Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c,0
altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
.Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e,0
.previous