From 0ad6e3c5199be12c9745da8f8b9e3c9f8066c235 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 21 Sep 2014 20:41:53 +0200 Subject: [PATCH 1/5] x86: Speed up ___preempt_schedule*() by using THUNK helpers ___preempt_schedule() does SAVE_ALL/RESTORE_ALL but this is suboptimal, we do not need to save/restore the callee-saved register. And we already have arch/x86/lib/thunk_*.S which implements the similar asm wrappers, so it makes sense to redefine ___preempt_schedule() as "THUNK ..." and remove preempt.S altogether. Signed-off-by: Oleg Nesterov Reviewed-by: Andy Lutomirski Cc: Denys Vlasenko Cc: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140921184153.GA23727@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 -- arch/x86/kernel/preempt.S | 25 ------------------------- arch/x86/lib/thunk_32.S | 20 ++++++++++++++++---- arch/x86/lib/thunk_64.S | 7 +++++++ 4 files changed, 23 insertions(+), 31 deletions(-) delete mode 100644 arch/x86/kernel/preempt.S diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ada2e2d6be3e..8f1e77440b2b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -39,8 +39,6 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o -obj-$(CONFIG_PREEMPT) += preempt.o - obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S deleted file mode 100644 index ca7f0d58a87d..000000000000 --- a/arch/x86/kernel/preempt.S +++ /dev/null @@ -1,25 +0,0 @@ - -#include -#include -#include -#include - -ENTRY(___preempt_schedule) - CFI_STARTPROC - SAVE_ALL - call preempt_schedule - RESTORE_ALL - ret - CFI_ENDPROC - -#ifdef CONFIG_CONTEXT_TRACKING - -ENTRY(___preempt_schedule_context) - CFI_STARTPROC - SAVE_ALL - call preempt_schedule_context - RESTORE_ALL - ret - CFI_ENDPROC - -#endif diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 28f85c916712..e9acf5f4fc92 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -7,16 +7,19 @@ #include #include -#ifdef CONFIG_TRACE_IRQFLAGS /* put return address in eax (arg1) */ - .macro thunk_ra name,func + .macro THUNK name, func, put_ret_addr_in_eax=0 .globl \name \name: pushl %eax pushl %ecx pushl %edx + + .if \put_ret_addr_in_eax /* Place EIP in the arg1 */ movl 3*4(%esp), %eax + .endif + call \func popl %edx popl %ecx @@ -25,6 +28,15 @@ _ASM_NOKPROBE(\name) .endm - thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller - thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller +#ifdef CONFIG_TRACE_IRQFLAGS + THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1 + THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 #endif + +#ifdef CONFIG_PREEMPT + THUNK ___preempt_schedule, preempt_schedule +#ifdef CONFIG_CONTEXT_TRACKING + THUNK ___preempt_schedule_context, preempt_schedule_context +#endif +#endif + diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index 92d9feaff42b..b30b5ebd614a 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -38,6 +38,13 @@ THUNK lockdep_sys_exit_thunk,lockdep_sys_exit #endif +#ifdef CONFIG_PREEMPT + THUNK ___preempt_schedule, preempt_schedule +#ifdef CONFIG_CONTEXT_TRACKING + THUNK ___preempt_schedule_context, preempt_schedule_context +#endif +#endif + /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC SAVE_ARGS From 212be3b2320bcf33eff648bc4e1f0edbf4d90acf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 21 Sep 2014 20:42:32 +0200 Subject: [PATCH 2/5] x86/lib/Makefile: Remove the unnecessary "+= thunk_64.o" Trivial. We have "lib-y += thunk_$(BITS).o" at the start, no need to add thunk_64.o if !CONFIG_X86_32. Signed-off-by: Oleg Nesterov Acked-by: Andy Lutomirski Cc: Denys Vlasenko Cc: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140921184232.GB23727@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 4d4f96a27638..66127b919a99 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -39,7 +39,7 @@ endif else obj-y += iomap_copy_64.o lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o - lib-y += thunk_64.o clear_page_64.o copy_page_64.o + lib-y += clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o copy_user_nocache_64.o lib-y += cmpxchg16b_emu.o From 3f63572187f5ae6a0a9e5ebee88b57e6f71c3cd4 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 24 Sep 2014 08:37:00 +0100 Subject: [PATCH 3/5] x86: Improve cmpxchg16b_emu.S - don't include unneeded headers - don't open-code PER_CPU_VAR() - drop redundant entry point label - complete unwind annotations - use .L prefix on local label to not clutter the symbol table Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/542290BC020000780003807D@mail.emea.novell.com Signed-off-by: Thomas Gleixner --- arch/x86/lib/cmpxchg16b_emu.S | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 1e572c507d06..40a172541ee2 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -6,15 +6,8 @@ * */ #include -#include -#include #include - -#ifdef CONFIG_SMP -#define SEG_PREFIX %gs: -#else -#define SEG_PREFIX -#endif +#include .text @@ -39,24 +32,25 @@ CFI_STARTPROC # *atomic* on a single cpu (as provided by the this_cpu_xx class of # macros). # -this_cpu_cmpxchg16b_emu: - pushf + pushfq_cfi cli - cmpq SEG_PREFIX(%rsi), %rax - jne not_same - cmpq SEG_PREFIX 8(%rsi), %rdx - jne not_same + cmpq PER_CPU_VAR((%rsi)), %rax + jne .Lnot_same + cmpq PER_CPU_VAR(8(%rsi)), %rdx + jne .Lnot_same - movq %rbx, SEG_PREFIX(%rsi) - movq %rcx, SEG_PREFIX 8(%rsi) + movq %rbx, PER_CPU_VAR((%rsi)) + movq %rcx, PER_CPU_VAR(8(%rsi)) - popf + CFI_REMEMBER_STATE + popfq_cfi mov $1, %al ret - not_same: - popf + CFI_RESTORE_STATE +.Lnot_same: + popfq_cfi xor %al,%al ret From 5f1d919a8ca15f450c749227bc5e2e18f3cbfdb4 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 24 Sep 2014 08:40:14 +0100 Subject: [PATCH 4/5] x86: Improve cmpxchg8b_emu.S - don't include unneeded headers - drop redundant entry point label - complete unwind annotations - use .L prefix on local labels to not clutter the symbol table Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/5422917E0200007800038081@mail.emea.novell.com Signed-off-by: Thomas Gleixner --- arch/x86/lib/cmpxchg8b_emu.S | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index 828cb710dec2..b4807fce5177 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -7,11 +7,8 @@ */ #include -#include -#include #include - .text /* @@ -30,27 +27,28 @@ CFI_STARTPROC # set the whole ZF thing (caller will just compare # eax:edx with the expected value) # -cmpxchg8b_emu: - pushfl + pushfl_cfi cli cmpl (%esi), %eax - jne not_same + jne .Lnot_same cmpl 4(%esi), %edx - jne half_same + jne .Lhalf_same movl %ebx, (%esi) movl %ecx, 4(%esi) - popfl + CFI_REMEMBER_STATE + popfl_cfi ret - not_same: + CFI_RESTORE_STATE +.Lnot_same: movl (%esi), %eax - half_same: +.Lhalf_same: movl 4(%esi), %edx - popfl + popfl_cfi ret CFI_ENDPROC From f74954f01ec9bb2004bcc24f247d1f26f1063ad2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 24 Sep 2014 08:41:30 +0100 Subject: [PATCH 5/5] x86: Unwind-annotate thunk_32.S Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/542291CA0200007800038085@mail.emea.novell.com Signed-off-by: Thomas Gleixner --- arch/x86/lib/thunk_32.S | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index e9acf5f4fc92..e28cdaf5ac2c 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -6,14 +6,19 @@ */ #include #include + #include /* put return address in eax (arg1) */ .macro THUNK name, func, put_ret_addr_in_eax=0 .globl \name \name: - pushl %eax - pushl %ecx - pushl %edx + CFI_STARTPROC + pushl_cfi %eax + CFI_REL_OFFSET eax, 0 + pushl_cfi %ecx + CFI_REL_OFFSET ecx, 0 + pushl_cfi %edx + CFI_REL_OFFSET edx, 0 .if \put_ret_addr_in_eax /* Place EIP in the arg1 */ @@ -21,10 +26,14 @@ .endif call \func - popl %edx - popl %ecx - popl %eax + popl_cfi %edx + CFI_RESTORE edx + popl_cfi %ecx + CFI_RESTORE ecx + popl_cfi %eax + CFI_RESTORE eax ret + CFI_ENDPROC _ASM_NOKPROBE(\name) .endm